Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added a framework for end-to-end tests #1022

Merged
merged 2 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,28 @@ lazy val integtest = (project in file("integ-test"))
lazy val integration = taskKey[Unit]("Run integration tests")
lazy val awsIntegration = taskKey[Unit]("Run AWS integration tests")

lazy val e2etest = (project in file("e2e-test"))
.dependsOn(flintCommons % "test->package", flintSparkIntegration % "test->package", pplSparkIntegration % "test->package", sparkSqlApplication % "test->package")
.settings(
commonSettings,
name := "e2e-test",
scalaVersion := scala212,
libraryDependencies ++= Seq(
"org.scalatest" %% "scalatest" % "3.2.15" % "test",
"org.apache.spark" %% "spark-connect-client-jvm" % "3.5.3" % "test",
"com.amazonaws" % "aws-java-sdk-s3" % "1.12.568" % "test",
"com.softwaremill.sttp.client3" %% "core" % "3.10.2" % "test",
"com.softwaremill.sttp.client3" %% "play2-json" % "3.10.2",
"com.typesafe.play" %% "play-json" % "2.9.2" % "test",
),
libraryDependencies ++= deps(sparkVersion),
javaOptions ++= Seq(
s"-DappJar=${(sparkSqlApplication / assembly).value.getAbsolutePath}",
s"-DextensionJar=${(flintSparkIntegration / assembly).value.getAbsolutePath}",
s"-DpplJar=${(pplSparkIntegration / assembly).value.getAbsolutePath}",
)
)

lazy val standaloneCosmetic = project
.settings(
name := "opensearch-spark-standalone",
Expand Down
72 changes: 46 additions & 26 deletions docker/integ-test/configuration-updater/apply-configuration.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,26 @@ curl -q \
-H 'Content-Type: application/json' \
-d '{"name": "integ-test", "versioning": {"enabled": true, "excludePrefixes": [], "excludeFolders": false}, "locking": true}' \
http://minio-S3:9001/api/v1/buckets
# Create the access key
# Create the test-resources bucket
curl -q \
-b /tmp/minio-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-d "{\"policy\": \"\", \"accessKey\": \"${S3_ACCESS_KEY}\", \"secretKey\": \"${S3_SECRET_KEY}\", \"description\": \"\", \"comment\": \"\", \"name\": \"\", \"expiry\": null}" \
http://minio-S3:9001/api/v1/service-account-credentials
-d '{"name": "test-resources", "versioning": {"enabled": false, "excludePrefixes": [], "excludeFolders": false}, "locking": true}' \
http://minio-S3:9001/api/v1/buckets
# Create the access key
curl -q \
-b /tmp/minio-cookies.txt \
-X GET
"http://minio-S3:9001/api/v1/service-accounts/${S3_ACCESS_KEY}"
if [ "$?" -ne "0" ]; then
curl -q \
-b /tmp/minio-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-d "{\"policy\": \"\", \"accessKey\": \"${S3_ACCESS_KEY}\", \"secretKey\": \"${S3_SECRET_KEY}\", \"description\": \"\", \"comment\": \"\", \"name\": \"\", \"expiry\": null}" \
http://minio-S3:9001/api/v1/service-account-credentials
fi

# Login to OpenSearch Dashboards
echo ">>> Login to OpenSearch dashboards"
Expand All @@ -43,31 +56,38 @@ if [ "$?" -eq "0" ]; then
else
echo " >>> Login failed"
fi

# Create the S3/Glue datasource
echo ">>> Creating datasource"
curl -q \
-b /tmp/opensearch-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-H 'Osd-Version: 2.18.0' \
-H 'Osd-Xsrf: fetch' \
-d "{\"name\": \"mys3\", \"allowedRoles\": [], \"connector\": \"s3glue\", \"properties\": {\"glue.auth.type\": \"iam_role\", \"glue.auth.role_arn\": \"arn:aws:iam::123456789012:role/S3Access\", \"glue.indexstore.opensearch.uri\": \"http://opensearch:9200\", \"glue.indexstore.opensearch.auth\": \"basicauth\", \"glue.indexstore.opensearch.auth.username\": \"admin\", \"glue.indexstore.opensearch.auth.password\": \"${OPENSEARCH_ADMIN_PASSWORD}\"}}" \
http://opensearch-dashboards:5601/api/directquery/dataconnections
if [ "$?" -eq "0" ]; then
echo " >>> S3 datasource created"
else
echo " >>> Failed to create S3 datasource"
fi
-X GET \
http://localhost:5601/api/directquery/dataconnections/mys3
if [ "$?" -ne "0" ]; then
echo ">>> Creating datasource"
curl -q \
-b /tmp/opensearch-cookies.txt \
-X POST \
-H 'Content-Type: application/json' \
-H 'Osd-Version: 2.18.0' \
-H 'Osd-Xsrf: fetch' \
-d "{\"name\": \"mys3\", \"allowedRoles\": [], \"connector\": \"s3glue\", \"properties\": {\"glue.auth.type\": \"iam_role\", \"glue.auth.role_arn\": \"arn:aws:iam::123456789012:role/S3Access\", \"glue.indexstore.opensearch.uri\": \"http://opensearch:9200\", \"glue.indexstore.opensearch.auth\": \"basicauth\", \"glue.indexstore.opensearch.auth.username\": \"admin\", \"glue.indexstore.opensearch.auth.password\": \"${OPENSEARCH_ADMIN_PASSWORD}\"}}" \
http://opensearch-dashboards:5601/api/directquery/dataconnections
if [ "$?" -eq "0" ]; then
echo " >>> S3 datasource created"
else
echo " >>> Failed to create S3 datasource"
fi

echo ">>> Setting cluster settings"
curl -v \
-u "admin:${OPENSEARCH_ADMIN_PASSWORD}" \
-X PUT \
-H 'Content-Type: application/json' \
-d '{"persistent": {"plugins.query.executionengine.spark.config": "{\"applicationId\":\"integ-test\",\"executionRoleARN\":\"arn:aws:iam::xxxxx:role/emr-job-execution-role\",\"region\":\"us-west-2\", \"sparkSubmitParameters\": \"--conf spark.dynamicAllocation.enabled=false\"}"}}' \
http://opensearch:9200/_cluster/settings
if [ "$?" -eq "0" ]; then
echo " >>> Successfully set cluster settings"
else
echo " >>> Failed to set cluster settings"
echo ">>> Setting cluster settings"
curl -v \
-u "admin:${OPENSEARCH_ADMIN_PASSWORD}" \
-X PUT \
-H 'Content-Type: application/json' \
-d '{"persistent": {"plugins.query.executionengine.spark.config": "{\"applicationId\":\"integ-test\",\"executionRoleARN\":\"arn:aws:iam::xxxxx:role/emr-job-execution-role\",\"region\":\"us-west-2\", \"sparkSubmitParameters\": \"--conf spark.dynamicAllocation.enabled=false\"}"}}' \
http://opensearch:9200/_cluster/settings
if [ "$?" -eq "0" ]; then
echo " >>> Successfully set cluster settings"
else
echo " >>> Failed to set cluster settings"
fi
fi
5 changes: 2 additions & 3 deletions docker/integ-test/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,8 @@ services:
FLINT_JAR: ${FLINT_JAR}
PPL_JAR: ${PPL_JAR}
SQL_APP_JAR: ${SQL_APP_JAR}
depends_on:
metastore:
condition: service_completed_successfully
entrypoint: /bin/bash
command: exit

opensearch:
build: ./opensearch
Expand Down
124 changes: 124 additions & 0 deletions e2e-test/README.md
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks great
I'm missing some detailed review and call diagram sequence of how the E2E operates for each case:

  • SQL Queries for Spark Master
  • PPL Queries for Spark Master
  • SQL Queries for OpenSearch Async API
  • PPL Queries for OpenSearch Async API

What are the functions of each docker-compose service and which sequence the call each other

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added some documentation with diagrams to explain how these queries are executed.

Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# End-to-End Tests

## Overview

The end-to-end tests start the integration test docker cluster and execute queries against it. Queries can be
sent to the Spark master or to OpenSearch server (using async query).

The tests will run a query and compare the results to an expected results file.

There are four types of tests:
1. SQL queries sent to the Spark master
2. PPL queries sent to the Spark master
3. SQL queries sent to the OpenSearch server as an async query
4. PPL queries sent to the OpenSearch server as an async query

## Running the End-to-End Tests

The tests can be run using SBT:

```shell
sbt e2etest/test
```

## Test Structure

### SQL Queries for Spark Master

Create two files:
* `e2e-test/src/test/resources/spark/queries/sql/[NAME].sql`
* `e2e-test/src/test/resources/spark/queries/sql/[NAME].results`

The `*.sql` file contains only the SQL query on one line.

The `*.results` file contains the results in CSV format with a header (column names).

### PPL Queries for Spark Master

Create two files:
* `e2e-test/src/test/resources/spark/queries/ppl/[NAME].ppl`
* `e2e-test/src/test/resources/spark/queries/ppl/[NAME].results`

The `*.ppl` file contains only the PPL query on one line.

The `*.results` file contains the results in CSV format with a header (column names).

### SQL Queries for OpenSearch Async API

Create two files:
* `e2e-test/src/test/resources/opensearch/queries/sql/[NAME].sql`
* `e2e-test/src/test/resources/opensearch/queries/sql/[NAME].results`

The `*.sql` file contains only the SQL query on one line.

The `*.results` file contains the results in JSON format. The format is the exact output from the REST call
to get the async query results (`_plugins/_async_query/[QUERY_ID]`).

Results example:
```json
{
"status": "SUCCESS",
"schema": [
{
"name": "id",
"type": "integer"
},
{
"name": "name",
"type": "string"
}
],
"datarows": [
[
1,
"Foo"
],
[
2,
"Bar"
]
],
"total": 2,
"size": 2
}
```

### PPL Queries for OpenSearch Async API

Create two files:
* `e2e-test/src/test/resources/opensearch/queries/ppl/[NAME].ppl`
* `e2e-test/src/test/resources/opensearch/queries/ppl/[NAME].results`

The `*.ppl` file contains only the PPL query on one line.

The `*.results` file contains the results in JSON format. The format is the exact output from the REST call
to get the async query results (`_plugins/_async_query/[QUERY_ID]`).

Results example:
```json
{
"status": "SUCCESS",
"schema": [
{
"name": "id",
"type": "integer"
},
{
"name": "name",
"type": "string"
}
],
"datarows": [
[
1,
"Foo"
],
[
2,
"Bar"
]
],
"total": 2,
"size": 2
}
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"mappings": {
"properties": {
"c_custkey": {
"type": "integer"
},
"c_name": {
"type": "text"
},
"c_address": {
"type": "text"
},
"c_nationkey": {
"type": "integer"
},
"c_phone": {
"type": "text"
},
"c_acctbal": {
"type": "double"
},
"c_mktsegment": {
"type": "text"
},
"c_comment": {
"type": "text"
}
}
}
}
12 changes: 12 additions & 0 deletions e2e-test/src/test/resources/opensearch/indices/http_logs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{"index": {"_index": "http_logs"}}
{"@timestamp": 1696154400000, "year": 2023, "month": 10, "day": 1, "clientip": "40.135.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736}
{"index": {"_index": "http_logs"}}
{"@timestamp": 1696154700000, "year": 2023, "month": 10, "day": 1, "clientip": "232.0.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736}
{"index": {"_index": "http_logs"}}
{"@timestamp": 1696155000000, "year": 2023, "month": 10, "day": 1, "clientip": "26.1.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736}
{"index": {"_index": "http_logs"}}
{"@timestamp": 1696155300000, "year": 2023, "month": 10, "day": 1, "clientip": "247.37.0.0", "request": "GET /french/splash_inet.html HTTP/1.0", "status": 200, "size": 3781}
{"index": {"_index": "http_logs"}}
{"@timestamp": 1696155600000, "year": 2023, "month": 10, "day": 1, "clientip": "247.37.0.0", "request": "GET /images/hm_nbg.jpg HTTP/1.0", "status": 304, "size": 0}
{"index": {"_index": "http_logs"}}
{"@timestamp": 1696155900000, "year": 2023, "month": 10, "day": 1, "clientip": "252.0.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"mappings": {
"properties": {
"@timestamp": {
"type": "date"
},
"year": {
"type": "integer"
},
"month": {
"type": "integer"
},
"day": {
"type": "integer"
},
"clientip": {
"type": "keyword"
},
"request": {
"type": "text"
},
"status": {
"type": "integer"
},
"size": {
"type": "integer"
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"mappings": {
"properties": {
"l_orderkey": {
"type": "integer"
},
"l_partkey": {
"type": "text"
},
"l_suppkey": {
"type": "integer"
},
"l_linenumber": {
"type": "integer"
},
"l_quantity": {
"type": "double"
},
"l_extendedprice": {
"type": "double"
},
"l_discount": {
"type": "double"
},
"l_tax": {
"type": "double"
},
"l_returnflag": {
"type": "text"
},
"l_linestatus": {
"type": "text"
},
"l_shipdate": {
"type": "date"
},
"l_commitdate": {
"type": "date"
},
"l_receiptdate": {
"type": "date"
},
"l_shipinstruct": {
"type": "text"
},
"l_shipmode": {
"type": "text"
},
"l_comment": {
"type": "text"
}
}
}
}
Loading
Loading