Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Issue #2327] Configure API/backend to work with OpenSearch #2450

Merged
merged 16 commits into from
Oct 15, 2024
Merged
6 changes: 2 additions & 4 deletions .github/workflows/cd-api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:
- "main"
paths:
- "api/**"
- ".github/"
release:
types: [published]
workflow_dispatch:
Expand All @@ -22,9 +23,6 @@ on:
- prod

jobs:
api-checks:
name: Run API Checks
uses: ./.github/workflows/ci-api.yml

deploy:
name: Deploy
Expand All @@ -33,7 +31,7 @@ jobs:
strategy:
max-parallel: 1
matrix:
envs: ${{ github.event_name == 'release' && fromJSON('["prod"]') || github.ref_name == 'main' && fromJSON('["dev", "staging"]') || fromJSON('["dev"]') }}
envs: ${{ fromJSON('["dev", "staging"]') }}
coilysiren marked this conversation as resolved.
Show resolved Hide resolved
with:
app_name: "api"
environment: ${{ matrix.envs }}
10 changes: 5 additions & 5 deletions api/local.env
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ HIDE_SQL_PARAMETER_LOGS=TRUE
# Opensearch Environment Variables
############################

OPENSEARCH_HOST=opensearch-node
OPENSEARCH_PORT=9200
OPENSEARCH_USE_SSL=FALSE
OPENSEARCH_VERIFY_CERTS=FALSE
SEARCH_ENDPOINT=opensearch-node
SEARCH_PORT=9200
SEARCH_USE_SSL=FALSE
SEARCH_VERIFY_CERTS=FALSE

############################
# AWS Defaults
Expand Down Expand Up @@ -126,4 +126,4 @@ IS_LOCAL_FOREIGN_TABLE=true
############################

# File path for the export_opportunity_data task
EXPORT_OPP_DATA_FILE_PATH=/tmp
EXPORT_OPP_DATA_FILE_PATH=/tmp
21 changes: 9 additions & 12 deletions api/src/adapters/search/opensearch_client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
from typing import Any, Generator, Iterable

import boto3
import opensearchpy

from src.adapters.search.opensearch_config import OpensearchConfig, get_opensearch_config
Expand Down Expand Up @@ -253,24 +252,22 @@ def scroll(


def _get_connection_parameters(opensearch_config: OpensearchConfig) -> dict[str, Any]:
# See: https://opensearch.org/docs/latest/clients/python-low-level/#connecting-to-amazon-opensearch-serverless
# See: https://opensearch.org/docs/latest/clients/python-low-level/#connecting-to-opensearch
# for further details on configuring the connection to OpenSearch

params = dict(
hosts=[{"host": opensearch_config.host, "port": opensearch_config.port}],
hosts=[{"host": opensearch_config.search_endpoint, "port": opensearch_config.search_port}],
http_compress=True,
use_ssl=opensearch_config.use_ssl,
verify_certs=opensearch_config.verify_certs,
use_ssl=opensearch_config.search_use_ssl,
verify_certs=opensearch_config.search_verify_certs,
connection_class=opensearchpy.RequestsHttpConnection,
pool_maxsize=opensearch_config.connection_pool_size,
pool_maxsize=opensearch_config.search_connection_pool_size,
)

# If an AWS region is set, we assume we're running non-locally
# and will attempt to authenticate with AOSS
if opensearch_config.aws_region is not None:
# If username and password are supplied (ie. when running non-locally)
# we will add http_auth to the client connection
if opensearch_config.search_username and opensearch_config.search_password:
# Get credentials and authorize with AWS Opensearch Serverless (aoss)
credentials = boto3.Session().get_credentials()
auth = opensearchpy.AWSV4SignerAuth(credentials, opensearch_config.aws_region, "aoss")
auth = (opensearch_config.search_username, opensearch_config.search_password)
params["http_auth"] = auth

return params
29 changes: 13 additions & 16 deletions api/src/adapters/search/opensearch_config.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,23 @@
import logging

from pydantic import Field
from pydantic_settings import SettingsConfigDict

from src.util.env_config import PydanticBaseEnvConfig

logger = logging.getLogger(__name__)


class OpensearchConfig(PydanticBaseEnvConfig):
model_config = SettingsConfigDict(env_prefix="OPENSEARCH_")

# TODO - hacky fix to get the API working again, host/port should
# be defined in terraform env vars
host: str = Field(default="NOT_DEFINED") # OPENSEARCH_HOST
port: int = Field(default=1) # OPENSEARCH_PORT
use_ssl: bool = Field(default=True) # OPENSEARCH_USE_SSL
verify_certs: bool = Field(default=True) # OPENSEARCH_VERIFY_CERTS
connection_pool_size: int = Field(default=10) # OPENSEARCH_CONNECTION_POOL_SIZE
search_endpoint: str = Field(default="NOT_DEFINED") # SEARCH_ENDPOINT
search_port: int = Field(default=443) # SEARCH_PORT

# AWS configuration
aws_region: str | None = Field(default=None) # OPENSEARCH_AWS_REGION
search_username: str | None = Field(default=None) # SEARCH_USERNAME
search_password: str | None = Field(default=None) # SEARCH_PASSWORD

search_use_ssl: bool = Field(default=True) # SEARCH_USE_SSL
search_verify_certs: bool = Field(default=True) # SEARCH_VERIFY_CERTS
search_connection_pool_size: int = Field(default=10) # SEARCH_CONNECTION_POOL_SIZE


def get_opensearch_config() -> OpensearchConfig:
Expand All @@ -29,11 +26,11 @@ def get_opensearch_config() -> OpensearchConfig:
logger.info(
"Constructed opensearch configuration",
extra={
"host": opensearch_config.host,
"port": opensearch_config.port,
"use_ssl": opensearch_config.use_ssl,
"verify_certs": opensearch_config.verify_certs,
"connection_pool_size": opensearch_config.connection_pool_size,
"search_endpoint": opensearch_config.search_endpoint,
"search_port": opensearch_config.search_port,
"search_use_ssl": opensearch_config.search_use_ssl,
"search_verify_certs": opensearch_config.search_verify_certs,
"search_connection_pool_size": opensearch_config.search_connection_pool_size,
},
)

Expand Down
1 change: 1 addition & 0 deletions api/src/search/backend/load_opportunities_to_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def fetch_opportunities(self) -> Iterator[Sequence[Opportunity]]:
Opportunity.is_draft.is_(False),
CurrentOpportunitySummary.opportunity_status.isnot(None),
)
.limit(50) # TODO - remove
.options(selectinload("*"), noload(Opportunity.all_opportunity_summaries))
.execution_options(yield_per=5000)
)
Expand Down
8 changes: 5 additions & 3 deletions api/src/search/backend/load_search_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import src.adapters.db as db
import src.adapters.search as search
from src.adapters.db import flask_db
from src.adapters.search import flask_opensearch
from src.search.backend.load_opportunities_to_index import LoadOpportunitiesToIndex
from src.search.backend.load_search_data_blueprint import load_search_data_blueprint
from src.task.ecs_background_task import ecs_background_task
Expand All @@ -17,8 +18,9 @@
help="Whether to run a full refresh, or only incrementally update oppportunities",
)
@flask_db.with_db_session()
@flask_opensearch.with_search_client()
@ecs_background_task(task_name="load-opportunity-data-opensearch")
def load_opportunity_data(db_session: db.Session, full_refresh: bool) -> None:
search_client = search.SearchClient()

def load_opportunity_data(
search_client: search.SearchClient, db_session: db.Session, full_refresh: bool
) -> None:
LoadOpportunitiesToIndex(db_session, search_client, full_refresh).run()
2 changes: 1 addition & 1 deletion api/tests/src/adapters/search/test_opensearch_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def test_get_connection_parameters():

# Mostly validating defaults get used
assert params == {
"hosts": [{"host": config.host, "port": 9200}],
"hosts": [{"host": config.search_endpoint, "port": 9200}],
"http_compress": True,
"use_ssl": False,
"verify_certs": False,
Expand Down
Loading