-
Notifications
You must be signed in to change notification settings - Fork 948
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2414 from krao14/krao14-feature-kendra-bedrock-cd…
…k-python New serverless pattern-lambda-kendra-bedrock-cdk-python
- Loading branch information
Showing
9 changed files
with
591 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
# AWS Lambda to Amazon Kendra to Amazon Bedrock | ||
|
||
This pattern contains a sample stack that utilizes an AWS Lambda function to retrieve documents from an Amazon Kendra index and then pass it to Amazon Bedrock to generate a response. The pattern includes usage of the Amazon S3 data source connector. | ||
|
||
Important: this application uses various AWS services and there are costs associated with these services after the Free Tier usage - please see the AWS Pricing page for details. You are responsible for any AWS costs incurred. No warranty is implied in this example. | ||
|
||
## Requirements | ||
* [Create an AWS account](https://portal.aws.amazon.com/gp/aws/developer/registration/index.html) if you do not already have one and log in. The IAM user that you use must have sufficient permissions to make necessary AWS service calls and manage AWS resources. | ||
* [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) installed and configured | ||
* [Git Installed](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) | ||
* [AWS CDK CLI](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html) (AWS CDK) installed | ||
* [Request Amazon Bedrock Model Access for Anthropic Claude models on Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) | ||
* [Create an S3 Bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/creating-bucket.html) and [upload documents](https://docs.aws.amazon.com/AmazonS3/latest/userguide/upload-objects.html) that you want to be indexed. If you already have an S3 bucket with data that you want to crawl, you can skip this step. Note down the name of the S3 bucket for later use. | ||
|
||
## Deployment Instructions | ||
1. Create a new directory, navigate to that directory in a terminal and clone the GitHub repository: | ||
``` | ||
git clone https://github.com/aws-samples/serverless-patterns | ||
``` | ||
1. Change directory to the pattern directory: | ||
``` | ||
cd kendra-bedrock-cdk-python | ||
``` | ||
1. From the command line, use AWS CDK to deploy the AWS resources for the pattern as specified in the app.py file: | ||
``` | ||
cdk deploy --parameters S3DSBucketName=${YourS3BucketName} | ||
``` | ||
1. Note the outputs from the CDK deployment process. These contain the resource names and/or ARNs which are used for testing. | ||
# How it works | ||
Please refer to the architecture diagram below: | ||
![End to End Architecture](images/architecture.png) | ||
Here's a breakdown of the steps: | ||
**AWS Lambda:** Two AWS Lambda functions are created. `DataSourceSync` crawls and indexes the content. `InvokeBedrockLambda` invokes the specified model by passing the retrieved content from Amazon Kendra as context to the generative AI model. | ||
**Amazon Kendra:** An Amazon Kendra index is created with a S3 data source connector. When a the `InvokeBedrockLambda` function is called, documents are retrieved from the Amazon Kendra index. | ||
**Amazon Bedrock:** Documents retrieved from the Amazon Kendra index are sent to Amazon Bedrock which responds with a generated response. | ||
## Testing | ||
CLI Lambda invoke with test event: | ||
```bash | ||
payload_base64=$(echo -n '{"question": "Value"}' | base64) | ||
aws lambda invoke \ | ||
--function-name INVOKE_LAMBDA_FUNCTION_ARN \ | ||
--payload "$payload_base64" \ | ||
output.txt | ||
``` | ||
|
||
The output.txt will contain the response generated by Amazon Bedrock. | ||
|
||
Example JSON Lambda test event: | ||
|
||
``` | ||
{ | ||
"question": "Value" | ||
} | ||
``` | ||
|
||
## Cleanup | ||
|
||
1. Delete the stack | ||
```bash | ||
cdk destroy | ||
``` | ||
---- | ||
Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
|
||
SPDX-License-Identifier: MIT-0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,229 @@ | ||
#!/usr/bin/env python3 | ||
import os | ||
|
||
import aws_cdk as cdk | ||
|
||
from aws_cdk import ( | ||
# Duration, | ||
Stack, | ||
aws_iam as iam, | ||
aws_kendra as kendra, | ||
aws_lambda as lambda_, | ||
aws_s3 as s3, | ||
RemovalPolicy, | ||
Duration, | ||
CfnParameter, | ||
CfnOutput, | ||
triggers | ||
|
||
) | ||
|
||
from constructs import Construct | ||
|
||
class BedrockKendraStack(Stack): | ||
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: | ||
super().__init__(scope, construct_id, **kwargs) | ||
|
||
# Define parameters | ||
model_id_param = CfnParameter( | ||
self, "ModelId", | ||
type="String", | ||
default="anthropic.claude-instant-v1", | ||
allowed_values=[ | ||
"anthropic.claude-instant-v1", | ||
"anthropic.claude-3-sonnet-20240229-v1:0", | ||
"anthropic.claude-3-haiku-20240307-v1:0", | ||
"anthropic.claude-v2" | ||
], | ||
description="Enter the Model Id of the Anthropic LLM" | ||
) | ||
|
||
s3_bucket_name_param = CfnParameter( | ||
self, "S3DSBucketName", | ||
type="String", | ||
description="Enter the S3 bucket name where the contents you want to be indexed are stored." | ||
) | ||
|
||
kendra_edition_param = CfnParameter( | ||
self, "KendraEdition", | ||
type="String", | ||
default="DEVELOPER_EDITION", | ||
allowed_values=[ | ||
"DEVELOPER_EDITION", | ||
"ENTERPRISE_EDITION" | ||
], | ||
description="Kendra edition (DEVELOPER_EDITION, ENTERPRISE_EDITION)" | ||
) | ||
|
||
# Use the parameter values in your stack | ||
model_id = model_id_param.value_as_string | ||
s3_bucket_name = s3_bucket_name_param.value_as_string | ||
kendra_edition = kendra_edition_param.value_as_string | ||
|
||
# Create Kendra index role | ||
kendra_index_role = iam.Role( | ||
self, "KendraIndexRole", | ||
assumed_by=iam.ServicePrincipal("kendra.amazonaws.com"), | ||
role_name=f"{construct_id}-KendraIndexRole", | ||
managed_policies=[ | ||
iam.ManagedPolicy.from_aws_managed_policy_name("CloudWatchLogsFullAccess") | ||
] | ||
) | ||
|
||
# Create Kendra index | ||
kendra_index = kendra.CfnIndex( | ||
self, "KendraIndex", | ||
name=f"{construct_id}-KendraIndex", | ||
role_arn=kendra_index_role.role_arn, | ||
edition=kendra_edition | ||
) | ||
|
||
# Create Kendra data source role | ||
kendra_ds_role = iam.Role( | ||
self, "KendraDSRole", | ||
assumed_by=iam.ServicePrincipal("kendra.amazonaws.com"), | ||
role_name=f"{construct_id}-DocsDSRole", | ||
managed_policies=[ | ||
iam.ManagedPolicy.from_aws_managed_policy_name("CloudWatchLogsFullAccess")], | ||
inline_policies={ | ||
"KendraDataSourcePolicy": iam.PolicyDocument( | ||
statements=[ | ||
iam.PolicyStatement( | ||
actions=["kendra:BatchPutDocument", "kendra:BatchDeleteDocument"], | ||
resources=[kendra_index.attr_arn] | ||
) | ||
] | ||
), | ||
"S3DataSourcePolicy": iam.PolicyDocument( | ||
statements=[ | ||
iam.PolicyStatement( | ||
actions=["s3:GetObject"], | ||
resources=[f"arn:aws:s3:::{s3_bucket_name}/*"] | ||
) | ||
] | ||
), | ||
"ListBucketPolicy": iam.PolicyDocument( | ||
statements=[ | ||
iam.PolicyStatement( | ||
actions=["s3:ListBucket"], | ||
resources=[f"arn:aws:s3:::{s3_bucket_name}"] | ||
) | ||
] | ||
) | ||
} | ||
) | ||
|
||
# Create Kendra S3 data source | ||
kendra_ds = kendra.CfnDataSource( | ||
self, "KendraDS", | ||
index_id=kendra_index.attr_id, | ||
name=f"{construct_id}-KendraS3DS", | ||
type='S3', | ||
data_source_configuration=kendra.CfnDataSource.DataSourceConfigurationProperty( | ||
s3_configuration=kendra.CfnDataSource.S3DataSourceConfigurationProperty( | ||
bucket_name=s3_bucket_name)), | ||
role_arn=kendra_ds_role.role_arn | ||
) | ||
|
||
# Add dependency | ||
kendra_ds.node.add_dependency(kendra_index) | ||
|
||
# Create a role for the DataSourceSyncLambda | ||
data_source_sync_lambda_role = iam.Role( | ||
self, "DataSourceSyncLambdaRole", | ||
assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"), | ||
managed_policies=[ | ||
iam.ManagedPolicy.from_aws_managed_policy_name("CloudWatchLogsFullAccess")], | ||
inline_policies={ | ||
"KendraDataSourceSyncPolicy": iam.PolicyDocument( | ||
statements=[ | ||
iam.PolicyStatement( | ||
actions=[ | ||
"kendra:StartDataSourceSyncJob", | ||
"kendra:StopDataSourceSyncJob" | ||
], | ||
resources=[ | ||
kendra_index.attr_arn, | ||
f"{kendra_index.attr_arn}/*"] | ||
) | ||
] | ||
) | ||
} | ||
) | ||
|
||
# Lambda function for initiating data source sync | ||
data_source_sync_lambda = lambda_.Function( | ||
self, "DataSourceSyncLambda", | ||
runtime=lambda_.Runtime.PYTHON_3_12, | ||
code=lambda_.Code.from_asset("src/dataSourceSync"), | ||
handler="dataSourceSyncLambda.lambda_handler", | ||
timeout=Duration.minutes(15), | ||
memory_size=1024, | ||
role = data_source_sync_lambda_role, | ||
environment={ | ||
"INDEX_ID": kendra_index.attr_id, | ||
"DS_ID": kendra_ds.attr_id | ||
} | ||
) | ||
|
||
# Trigger data source sync lambda | ||
triggers.Trigger(self, "data_source_sync_lambda_trigger", | ||
handler=data_source_sync_lambda, | ||
timeout=Duration.minutes(10), | ||
invocation_type=triggers.InvocationType.EVENT | ||
) | ||
|
||
# Create the IAM role | ||
invoke_bedrock_lambda_role = iam.Role( | ||
self, "InvokeBedRockLambdaRole", | ||
assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"), | ||
managed_policies=[ | ||
iam.ManagedPolicy.from_aws_managed_policy_name("CloudWatchLogsFullAccess") | ||
], | ||
inline_policies={ | ||
"InvokeBedRockPolicy": iam.PolicyDocument( | ||
statements=[ | ||
iam.PolicyStatement( | ||
actions=["bedrock:InvokeModel"], | ||
resources=[f"arn:aws:bedrock:{self.region}::foundation-model/{model_id}"] | ||
) | ||
] | ||
), | ||
"KendraRetrievalPolicy": iam.PolicyDocument( | ||
statements=[ | ||
iam.PolicyStatement( | ||
actions=["kendra:Retrieve"], | ||
resources=[kendra_index.attr_arn] | ||
) | ||
] | ||
) | ||
} | ||
) | ||
|
||
# Lambda function for invoking Bedrock | ||
invoke_bedrock_lambda = lambda_.Function( | ||
self, "InvokeBedrockLambda", | ||
runtime=lambda_.Runtime.PYTHON_3_12, | ||
code=lambda_.Code.from_asset("src/invokeBedrockLambda"), | ||
handler="invokeBedrockLambda.lambda_handler", | ||
timeout=Duration.seconds(120), | ||
memory_size=3008, | ||
role = invoke_bedrock_lambda_role, | ||
tracing=lambda_.Tracing.ACTIVE, | ||
environment={ | ||
"INDEX_ID": kendra_index.attr_id, | ||
"MODEL_ID": model_id | ||
} | ||
) | ||
|
||
# Output values | ||
CfnOutput(self, "KendraIndexRoleArn", value=kendra_index_role.role_arn, description="Kendra index role ARN") | ||
CfnOutput(self, "KendraIndexID", value=kendra_index.attr_id, description="Kendra index ID") | ||
CfnOutput(self, "KendraS3DataSourceArn", value=kendra_ds.attr_arn, description="Kendra S3 data source ARN") | ||
CfnOutput(self, "DataSourceSyncLambdaArn", value=data_source_sync_lambda.function_arn, description="Data source sync lambda function ARN") | ||
CfnOutput(self, "InvokeBedrockLambdaArn", value=invoke_bedrock_lambda.function_arn, description="Invoke bedrock lambda function ARN") | ||
|
||
app = cdk.App() | ||
BedrockKendraStack(app, "BedrockKendraStack") | ||
|
||
app.synth() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
{ | ||
"app": "python3 app.py", | ||
"watch": { | ||
"include": [ | ||
"**" | ||
], | ||
"exclude": [ | ||
"README.md", | ||
"cdk*.json", | ||
"requirements*.txt", | ||
"source.bat", | ||
"**/__init__.py", | ||
"**/__pycache__", | ||
"tests" | ||
] | ||
}, | ||
"context": { | ||
"@aws-cdk/aws-lambda:recognizeLayerVersion": true, | ||
"@aws-cdk/core:checkSecretUsage": true, | ||
"@aws-cdk/core:target-partitions": [ | ||
"aws", | ||
"aws-cn" | ||
], | ||
"@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, | ||
"@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, | ||
"@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, | ||
"@aws-cdk/aws-iam:minimizePolicies": true, | ||
"@aws-cdk/core:validateSnapshotRemovalPolicy": true, | ||
"@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, | ||
"@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, | ||
"@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, | ||
"@aws-cdk/aws-apigateway:disableCloudWatchRole": true, | ||
"@aws-cdk/core:enablePartitionLiterals": true, | ||
"@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, | ||
"@aws-cdk/aws-iam:standardizedServicePrincipals": true, | ||
"@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, | ||
"@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, | ||
"@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, | ||
"@aws-cdk/aws-route53-patters:useCertificate": true, | ||
"@aws-cdk/customresources:installLatestAwsSdkDefault": false, | ||
"@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, | ||
"@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true, | ||
"@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true, | ||
"@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true, | ||
"@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true, | ||
"@aws-cdk/aws-redshift:columnId": true, | ||
"@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true, | ||
"@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true, | ||
"@aws-cdk/aws-apigateway:requestValidatorUniqueId": true, | ||
"@aws-cdk/aws-kms:aliasNameRef": true, | ||
"@aws-cdk/aws-autoscaling:generateLaunchTemplateInsteadOfLaunchConfig": true, | ||
"@aws-cdk/core:includePrefixInUniqueNameGeneration": true, | ||
"@aws-cdk/aws-efs:denyAnonymousAccess": true, | ||
"@aws-cdk/aws-opensearchservice:enableOpensearchMultiAzWithStandby": true, | ||
"@aws-cdk/aws-lambda-nodejs:useLatestRuntimeVersion": true, | ||
"@aws-cdk/aws-efs:mountTargetOrderInsensitiveLogicalId": true, | ||
"@aws-cdk/aws-rds:auroraClusterChangeScopeOfInstanceParameterGroupWithEachParameters": true, | ||
"@aws-cdk/aws-appsync:useArnForSourceApiAssociationIdentifier": true, | ||
"@aws-cdk/aws-rds:preventRenderingDeprecatedCredentials": true, | ||
"@aws-cdk/aws-codepipeline-actions:useNewDefaultBranchForCodeCommitSource": true | ||
} | ||
} |
Oops, something went wrong.