diff --git a/justfile b/justfile index dac5cc30..a1d47fc7 100644 --- a/justfile +++ b/justfile @@ -1,3 +1,4 @@ +# Lists all targets default: just --list @@ -9,14 +10,48 @@ docker +args: docker compose -f dev.docker-compose.yaml down exit $exit_code +# Builds in docker ci-build: just docker cargo build +# Runs the tests in docker ci-test: just docker cargo test +# Runs linting checks in docker ci-lint: just docker cargo clippy +# Runs benchmarks in docker ci-bench: just docker cargo bench + +# Runs the tests +test *ARGS: + #!/usr/bin/env bash + if command -v cargo-nextest; then + COMMAND=(cargo nextest run) + else + COMMAND=(cargo test) + fi + COMMAND+=(--no-fail-fast "$@") + echo "${COMMAND[*]}" + "${COMMAND[@]}" + +# Formats all the Markdown, Rust, Nix etc +fix-format: fix-format-prettier + cargo fmt --all + ! command -v nix || nix fmt + +# Formats Markdown, etc with prettier +fix-format-prettier: + npx --yes prettier --write . + +# Runs the tests and updates all goldenfiles with the test output +update-golden-files: + UPDATE_GOLDENFILES=1 just test + just fix-format-prettier + +# Starts the ndc-spec documentation webserver +start-docs: + cd specification && mdbook serve diff --git a/ndc-models/src/lib.rs b/ndc-models/src/lib.rs index aec6e089..68f4b64c 100644 --- a/ndc-models/src/lib.rs +++ b/ndc-models/src/lib.rs @@ -95,7 +95,7 @@ pub struct ExistsCapabilities { #[schemars(title = "Nested Field Capabilities")] pub struct NestedFieldCapabilities { /// Does the connector support filtering by values of nested fields - pub filter_by: Option, + pub filter_by: Option, /// Does the connector support ordering by values of nested fields pub order_by: Option, /// Does the connector support aggregating values within nested fields @@ -104,7 +104,35 @@ pub struct NestedFieldCapabilities { /// `NestedField::NestedCollection` pub nested_collections: Option, } -// ANCHOR_END: NestedCollectionCapabilities +// ANCHOR_END: NestedFieldCapabilities + +// ANCHOR: NestedFieldFilterByCapabilities +#[skip_serializing_none] +#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize, JsonSchema)] +#[schemars(title = "Nested Field Filter By Capabilities")] +pub struct NestedFieldFilterByCapabilities { + /// Does the connector support filtering over nested arrays + pub nested_arrays: Option, +} +// ANCHOR_END: NestedFieldFilterByCapabilities + +// ANCHOR: NestedArrayFilterByCapabilities +#[skip_serializing_none] +#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize, JsonSchema)] +#[schemars(title = "Nested Array Filter By Capabilities")] +pub struct NestedArrayFilterByCapabilities { + /// Does the connector support filtering over nested arrays using existential quantification. + /// This means the connector must support ExistsInCollection::NestedScalarCollection. + pub exists: Option, + /// Does the connector support filtering over nested arrays by checking if the array contains a value. + /// This must be supported for all types that can be contained in an array that implement an 'eq' + /// comparison operator. + pub contains: Option, + /// Does the connector support filtering over nested arrays by checking if the array is empty. + /// This must be supported no matter what type is contained in the array. + pub is_empty: Option, +} +// ANCHOR_END: NestedArrayFilterByCapabilities // ANCHOR: AggregateCapabilities #[skip_serializing_none] @@ -817,6 +845,10 @@ pub enum Expression { operator: ComparisonOperatorName, value: ComparisonValue, }, + ArrayComparison { + column: ComparisonTarget, + comparison: ArrayComparison, + }, Exists { in_collection: ExistsInCollection, predicate: Option>, @@ -824,6 +856,18 @@ pub enum Expression { } // ANCHOR_END: Expression +// ANCHOR: ArrayComparison +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)] +#[schemars(title = "Array Comparison")] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ArrayComparison { + /// Check if the array contains the specified value + Contains { value: ComparisonValue }, + /// Check is the array is empty + IsEmpty, +} +// ANCHOR_END: ArrayComparison + // ANCHOR: UnaryComparisonOperator #[derive( Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize, JsonSchema, @@ -931,6 +975,18 @@ pub enum ExistsInCollection { #[serde(skip_serializing_if = "Vec::is_empty", default)] field_path: Vec, }, + /// Specifies a column that contains a nested array of scalars. The + /// array will be brought into scope of the nested expression where + /// each element becomes an object with one '__value' column that + /// contains the element value. + NestedScalarCollection { + column_name: FieldName, + #[serde(skip_serializing_if = "BTreeMap::is_empty", default)] + arguments: BTreeMap, + /// Path to a nested collection via object columns + #[serde(skip_serializing_if = "Vec::is_empty", default)] + field_path: Vec, + }, } // ANCHOR_END: ExistsInCollection diff --git a/ndc-models/tests/json_schema/capabilities_response.jsonschema b/ndc-models/tests/json_schema/capabilities_response.jsonschema index cd893ef2..512e7b6d 100644 --- a/ndc-models/tests/json_schema/capabilities_response.jsonschema +++ b/ndc-models/tests/json_schema/capabilities_response.jsonschema @@ -178,7 +178,7 @@ "description": "Does the connector support filtering by values of nested fields", "anyOf": [ { - "$ref": "#/definitions/LeafCapability" + "$ref": "#/definitions/NestedFieldFilterByCapabilities" }, { "type": "null" @@ -220,6 +220,62 @@ } } }, + "NestedFieldFilterByCapabilities": { + "title": "Nested Field Filter By Capabilities", + "type": "object", + "properties": { + "nested_arrays": { + "description": "Does the connector support filtering over nested arrays", + "anyOf": [ + { + "$ref": "#/definitions/NestedArrayFilterByCapabilities" + }, + { + "type": "null" + } + ] + } + } + }, + "NestedArrayFilterByCapabilities": { + "title": "Nested Array Filter By Capabilities", + "type": "object", + "properties": { + "exists": { + "description": "Does the connector support filtering over nested arrays using existential quantification. This means the connector must support ExistsInCollection::NestedScalarCollection.", + "anyOf": [ + { + "$ref": "#/definitions/LeafCapability" + }, + { + "type": "null" + } + ] + }, + "contains": { + "description": "Does the connector support filtering over nested arrays by checking if the array contains a value. This must be supported for all types that can be contained in an array that implement an 'eq' comparison operator.", + "anyOf": [ + { + "$ref": "#/definitions/LeafCapability" + }, + { + "type": "null" + } + ] + }, + "is_empty": { + "description": "Does the connector support filtering over nested arrays by checking if the array is empty. This must be supported no matter what type is contained in the array.", + "anyOf": [ + { + "$ref": "#/definitions/LeafCapability" + }, + { + "type": "null" + } + ] + } + } + }, "ExistsCapabilities": { "title": "Exists Capabilities", "type": "object", diff --git a/ndc-models/tests/json_schema/mutation_request.jsonschema b/ndc-models/tests/json_schema/mutation_request.jsonschema index aa81b050..05b008e7 100644 --- a/ndc-models/tests/json_schema/mutation_request.jsonschema +++ b/ndc-models/tests/json_schema/mutation_request.jsonschema @@ -741,6 +741,28 @@ } } }, + { + "type": "object", + "required": [ + "column", + "comparison", + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "array_comparison" + ] + }, + "column": { + "$ref": "#/definitions/ComparisonTarget" + }, + "comparison": { + "$ref": "#/definitions/ArrayComparison" + } + } + }, { "type": "object", "required": [ @@ -942,6 +964,45 @@ } ] }, + "ArrayComparison": { + "title": "Array Comparison", + "oneOf": [ + { + "description": "Check if the array contains the specified value", + "type": "object", + "required": [ + "type", + "value" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "contains" + ] + }, + "value": { + "$ref": "#/definitions/ComparisonValue" + } + } + }, + { + "description": "Check is the array is empty", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "is_empty" + ] + } + } + } + ] + }, "ExistsInCollection": { "title": "Exists In Collection", "oneOf": [ @@ -1028,6 +1089,38 @@ } } } + }, + { + "description": "Specifies a column that contains a nested array of scalars. The array will be brought into scope of the nested expression where each element becomes an object with one '__value' column that contains the element value.", + "type": "object", + "required": [ + "column_name", + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "nested_scalar_collection" + ] + }, + "column_name": { + "type": "string" + }, + "arguments": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/Argument" + } + }, + "field_path": { + "description": "Path to a nested collection via object columns", + "type": "array", + "items": { + "type": "string" + } + } + } } ] }, diff --git a/ndc-models/tests/json_schema/query_request.jsonschema b/ndc-models/tests/json_schema/query_request.jsonschema index eab3047b..39870a0c 100644 --- a/ndc-models/tests/json_schema/query_request.jsonschema +++ b/ndc-models/tests/json_schema/query_request.jsonschema @@ -726,6 +726,28 @@ } } }, + { + "type": "object", + "required": [ + "column", + "comparison", + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "array_comparison" + ] + }, + "column": { + "$ref": "#/definitions/ComparisonTarget" + }, + "comparison": { + "$ref": "#/definitions/ArrayComparison" + } + } + }, { "type": "object", "required": [ @@ -927,6 +949,45 @@ } ] }, + "ArrayComparison": { + "title": "Array Comparison", + "oneOf": [ + { + "description": "Check if the array contains the specified value", + "type": "object", + "required": [ + "type", + "value" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "contains" + ] + }, + "value": { + "$ref": "#/definitions/ComparisonValue" + } + } + }, + { + "description": "Check is the array is empty", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "is_empty" + ] + } + } + } + ] + }, "ExistsInCollection": { "title": "Exists In Collection", "oneOf": [ @@ -1013,6 +1074,38 @@ } } } + }, + { + "description": "Specifies a column that contains a nested array of scalars. The array will be brought into scope of the nested expression where each element becomes an object with one '__value' column that contains the element value.", + "type": "object", + "required": [ + "column_name", + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "nested_scalar_collection" + ] + }, + "column_name": { + "type": "string" + }, + "arguments": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/Argument" + } + }, + "field_path": { + "description": "Path to a nested collection via object columns", + "type": "array", + "items": { + "type": "string" + } + } + } } ] }, diff --git a/ndc-reference/bin/reference/main.rs b/ndc-reference/bin/reference/main.rs index 9e59aa14..db042d12 100644 --- a/ndc-reference/bin/reference/main.rs +++ b/ndc-reference/bin/reference/main.rs @@ -16,6 +16,7 @@ use axum::{ }; use indexmap::IndexMap; use itertools::Itertools; +use models::FieldName; use ndc_models::{self as models}; use prometheus::{Encoder, IntCounter, IntGauge, Opts, Registry, TextEncoder}; use regex::Regex; @@ -290,7 +291,13 @@ async fn get_capabilities() -> Json { }, explain: None, nested_fields: models::NestedFieldCapabilities { - filter_by: Some(models::LeafCapability {}), + filter_by: Some(models::NestedFieldFilterByCapabilities { + nested_arrays: Some(models::NestedArrayFilterByCapabilities { + exists: Some(models::LeafCapability {}), + contains: Some(models::LeafCapability {}), + is_empty: Some(models::LeafCapability {}), + }), + }), order_by: Some(models::LeafCapability {}), aggregates: Some(models::LeafCapability {}), nested_collections: Some(models::LeafCapability {}), @@ -2048,6 +2055,21 @@ fn eval_expression( eval_comparison_operator(operator, &left_val, &right_vals) } // ANCHOR_END: eval_expression_binary_operators + // ANCHOR: eval_expression_array_comparison + models::Expression::ArrayComparison { column, comparison } => { + let left_val = + eval_comparison_target(collection_relationships, variables, state, column, item)?; + eval_array_comparison( + collection_relationships, + variables, + &left_val, + comparison, + state, + scopes, + item, + ) + } + // ANCHOR_END: eval_expression_array_comparison // ANCHOR: eval_expression_exists models::Expression::Exists { in_collection, @@ -2085,7 +2107,7 @@ fn eval_expression( }), ))?; Ok(!rows.is_empty()) - } // ANCHOR_END: eval_expression_exists + } // ANCHOR_END: eval_expression_exists, } } // ANCHOR_END: eval_expression @@ -2196,6 +2218,49 @@ fn eval_comparison_operator( } } // ANCHOR_END: eval_comparison_operator +// ANCHOR: eval_array_comparison +fn eval_array_comparison( + collection_relationships: &BTreeMap, + variables: &BTreeMap, + left_val: &serde_json::Value, + comparison: &models::ArrayComparison, + state: &AppState, + scopes: &[&BTreeMap], + item: &BTreeMap, +) -> Result { + let left_val_array = left_val.as_array().ok_or_else(|| { + ( + StatusCode::BAD_REQUEST, + Json(models::ErrorResponse { + message: "column used in array comparison is not an array".into(), + details: serde_json::Value::Null, + }), + ) + })?; + + match comparison { + models::ArrayComparison::Contains { value } => { + let right_vals = eval_comparison_value( + collection_relationships, + variables, + value, + state, + scopes, + item, + )?; + + for right_val in right_vals { + if left_val_array.contains(&right_val) { + return Ok(true); + } + } + + Ok(false) + } + models::ArrayComparison::IsEmpty => Ok(left_val_array.is_empty()), + } +} +// ANCHOR_END: eval_array_comparison // ANCHOR: eval_in_collection fn eval_in_collection( collection_relationships: &BTreeMap, @@ -2255,6 +2320,28 @@ fn eval_in_collection( ) }) } + models::ExistsInCollection::NestedScalarCollection { + field_path, + column_name, + arguments, + } => { + let value = + eval_column_field_path(variables, item, column_name, Some(field_path), arguments)?; + let value_array = value.as_array().ok_or_else(|| { + ( + StatusCode::BAD_REQUEST, + Json(models::ErrorResponse { + message: "nested scalar collection column value must be an array".into(), + details: serde_json::Value::Null, + }), + ) + })?; + let wrapped_array_values = value_array + .iter() + .map(|v| BTreeMap::from([(FieldName::from("__value"), v.clone())])) + .collect(); + Ok(wrapped_array_values) + } } } // ANCHOR_END: eval_in_collection diff --git a/ndc-reference/countries.jsonl b/ndc-reference/countries.jsonl index e373f2f6..e1423592 100644 --- a/ndc-reference/countries.jsonl +++ b/ndc-reference/countries.jsonl @@ -1,3 +1,4 @@ {"id": 1, "name": "UK", "area_km2": 244376, "cities": [{"name": "London"}, {"name": "Birmingham"}, {"name": "Manchester"}, {"name": "Glasgow"}, {"name": "Liverpool"}, {"name": "Bristol"}, {"name": "Edinburgh"}, {"name": "Leeds"}, {"name": "Sheffield"}, {"name": "Newcastle"}, {"name": "Nottingham"}, {"name": "Cardiff"}, {"name": "Belfast"}, {"name": "Leicester"}, {"name": "Coventry"}, {"name": "Sunderland"}, {"name": "Brighton"}, {"name": "Hull"}, {"name": "Plymouth"}, {"name": "Derby"}]} {"id": 2, "name": "Sweden", "area_km2": 450295, "cities": [{"name": "Stockholm"}, {"name": "Gothenburg"}, {"name": "Malmö"}, {"name": "Uppsala"}, {"name": "Västerås"}, {"name": "Örebro"}, {"name": "Linköping"}, {"name": "Helsingborg"}]} {"id": 3, "name": "Australia", "area_km2": 7688287, "cities": [{"name": "Melbourne"}, {"name": "Sydney"}, {"name": "Brisbane"}, {"name": "Adelaide"}, {"name": "Canberra"}, {"name": "Perth"}, {"name": "Darwin"}, {"name": "Hobart"}]} +{"id": 4, "name": "Mars", "area_km2": 144798500, "cities": []} diff --git a/ndc-reference/tests/capabilities/expected.json b/ndc-reference/tests/capabilities/expected.json index 32bb340a..12b6efef 100644 --- a/ndc-reference/tests/capabilities/expected.json +++ b/ndc-reference/tests/capabilities/expected.json @@ -12,7 +12,13 @@ }, "variables": {}, "nested_fields": { - "filter_by": {}, + "filter_by": { + "nested_arrays": { + "exists": {}, + "contains": {}, + "is_empty": {} + } + }, "order_by": {}, "aggregates": {}, "nested_collections": {} diff --git a/ndc-reference/tests/query/predicate_with_array_contains/expected.json b/ndc-reference/tests/query/predicate_with_array_contains/expected.json new file mode 100644 index 00000000..5da9fe75 --- /dev/null +++ b/ndc-reference/tests/query/predicate_with_array_contains/expected.json @@ -0,0 +1,16 @@ +[ + { + "rows": [ + { + "id": 2, + "name": "Chalmers University of Technology", + "location": { + "campuses": [ + "Johanneberg", + "Lindholmen" + ] + } + } + ] + } +] \ No newline at end of file diff --git a/ndc-reference/tests/query/predicate_with_array_contains/request.json b/ndc-reference/tests/query/predicate_with_array_contains/request.json new file mode 100644 index 00000000..889a266c --- /dev/null +++ b/ndc-reference/tests/query/predicate_with_array_contains/request.json @@ -0,0 +1,52 @@ +{ + "$schema": "../../../../ndc-models/tests/json_schema/query_request.jsonschema", + "collection": "institutions", + "arguments": {}, + "query": { + "fields": { + "id": { + "type": "column", + "column": "id" + }, + "name": { + "type": "column", + "column": "name" + }, + "location": { + "type": "column", + "column": "location", + "fields": { + "type": "object", + "fields": { + "campuses": { + "type": "column", + "column": "campuses", + "arguments": { + "limit": { + "type": "literal", + "value": null + } + } + } + } + } + } + }, + "predicate": { + "type": "array_comparison", + "column": { + "type": "column", + "name": "location", + "field_path": ["campuses"] + }, + "comparison": { + "type": "contains", + "value": { + "type": "scalar", + "value": "Lindholmen" + } + } + } + }, + "collection_relationships": {} +} diff --git a/ndc-reference/tests/query/predicate_with_array_is_empty/expected.json b/ndc-reference/tests/query/predicate_with_array_is_empty/expected.json new file mode 100644 index 00000000..91397f83 --- /dev/null +++ b/ndc-reference/tests/query/predicate_with_array_is_empty/expected.json @@ -0,0 +1,11 @@ +[ + { + "rows": [ + { + "id": 4, + "name": "Mars", + "cities": [] + } + ] + } +] \ No newline at end of file diff --git a/ndc-reference/tests/query/predicate_with_array_is_empty/request.json b/ndc-reference/tests/query/predicate_with_array_is_empty/request.json new file mode 100644 index 00000000..72d39475 --- /dev/null +++ b/ndc-reference/tests/query/predicate_with_array_is_empty/request.json @@ -0,0 +1,44 @@ +{ + "$schema": "../../../../ndc-models/tests/json_schema/query_request.jsonschema", + "collection": "countries", + "arguments": {}, + "query": { + "fields": { + "id": { + "type": "column", + "column": "id" + }, + "name": { + "type": "column", + "column": "name" + }, + "cities": { + "type": "column", + "column": "cities", + "arguments": { + "limit": { + "type": "literal", + "value": null + } + } + } + }, + "predicate": { + "type": "array_comparison", + "column": { + "type": "column", + "name": "cities", + "arguments": { + "limit": { + "type": "literal", + "value": null + } + } + }, + "comparison": { + "type": "is_empty" + } + } + }, + "collection_relationships": {} +} diff --git a/ndc-reference/tests/query/predicate_with_exists_in_nested_scalar_collection/expected.json b/ndc-reference/tests/query/predicate_with_exists_in_nested_scalar_collection/expected.json new file mode 100644 index 00000000..b0453c02 --- /dev/null +++ b/ndc-reference/tests/query/predicate_with_exists_in_nested_scalar_collection/expected.json @@ -0,0 +1,28 @@ +[ + { + "rows": [ + { + "id": 1, + "name": "Queen Mary University of London", + "location": { + "campuses": [ + "Mile End", + "Whitechapel", + "Charterhouse Square", + "West Smithfield" + ] + } + }, + { + "id": 2, + "name": "Chalmers University of Technology", + "location": { + "campuses": [ + "Johanneberg", + "Lindholmen" + ] + } + } + ] + } +] \ No newline at end of file diff --git a/ndc-reference/tests/query/predicate_with_exists_in_nested_scalar_collection/request.json b/ndc-reference/tests/query/predicate_with_exists_in_nested_scalar_collection/request.json new file mode 100644 index 00000000..6e9adca0 --- /dev/null +++ b/ndc-reference/tests/query/predicate_with_exists_in_nested_scalar_collection/request.json @@ -0,0 +1,58 @@ +{ + "$schema": "../../../../ndc-models/tests/json_schema/query_request.jsonschema", + "collection": "institutions", + "arguments": {}, + "query": { + "fields": { + "id": { + "type": "column", + "column": "id" + }, + "name": { + "type": "column", + "column": "name" + }, + "location": { + "type": "column", + "column": "location", + "fields": { + "type": "object", + "fields": { + "campuses": { + "type": "column", + "column": "campuses", + "arguments": { + "limit": { + "type": "literal", + "value": null + } + } + } + } + } + } + }, + "predicate": { + "type": "exists", + "in_collection": { + "type": "nested_scalar_collection", + "column_name": "location", + "field_path": ["campuses"], + "arguments": {} + }, + "predicate": { + "type": "binary_comparison_operator", + "column": { + "type": "column", + "name": "__value" + }, + "operator": "like", + "value": { + "type": "scalar", + "value": "d" + } + } + } + }, + "collection_relationships": {} +} diff --git a/rfcs/0023-filtering-over-nested-array-of-scalars.md b/rfcs/0023-filtering-over-nested-array-of-scalars.md new file mode 100644 index 00000000..545c4049 --- /dev/null +++ b/rfcs/0023-filtering-over-nested-array-of-scalars.md @@ -0,0 +1,524 @@ +# Filtering over nested arrays of scalars + +## Purpose + +Right now, if you have an object type defined that has a field that is an array of a scalar type, there is no way to filter a collection of the object type by whether or not something exists inside that field's array of scalar type. + +Imagine we have a `Users` collection of `User` object type: + +```yaml +User: + fields: + id: + type: + type: named + name: Int + name: + type: + type: named + name: String + roles: + type: + type: array + element_type: + type: named + name: String +``` + +An example object might be: + +```yaml +id: 1 +name: Daniel +roles: ["admin", "user"] +``` + +We might want to issue a query where we filter the `Users` collection so we only return `User` objects where `roles` contains the string `"admin"`. We might try to do this using the following NDC query: + +```yaml +collection: Users +query: + fields: + id: + type: column + column: id + predicate: + type: binary_comparison_operator + column: + type: column + name: roles + path: [] + operator: eq + value: + type: scalar + value: admin +arguments: {} +collection_relationships: {} +``` + +However, this would currently be illegal since the type of the the `roles` column (array of `String`) does not match the value's type (`String`). + +## Proposal + +We could add another variant to `ExistsInCollection` that brings into scope of the nested expression where each element becomes an object with one `__value` column that contains the element value. Then, the usual `Expression::BinaryComparisonOperator`, etc operations could be used over that virtual column. + +```rust +pub enum ExistsInCollection { + ... + NestedScalarCollection { + column_name: FieldName, + arguments: BTreeMap, + /// Path to a nested collection via object columns + field_path: Vec, + } +} +``` + +Given this, the query from above could be written like so: + +```yaml +collection: Users +query: + fields: + id: + type: column + column: id + predicate: + type: exists + in_collection: + type: nested_scalar_collection + column_name: roles + predicate: + type: binary_comparison_operator + column: + type: column + name: __value + path: [] + operator: eq + value: + type: scalar + value: admin +arguments: {} +collection_relationships: {} +``` + +We could also support comparisons against the whole array value directly by adding a new `Expression` variant: + +```rust +pub enum Expression { + ... + ArrayComparison { + column: ComparisonTarget, + comparison: ArrayComparison, + }, +} +``` + +The `ArrayComparison` type would then capture the different types of comparisons one could do against the array: + +```rust +pub enum ArrayComparison { + /// Check if the array contains the specified value + Contains { + value: ComparisonValue, + }, + /// Check is the array is empty + IsEmpty, +} +``` + +Whether or not these new array comparisons would be supported by the connector would be declared in the capabilities: + +```jsonc +{ + "query": { + "aggregates": {}, + "variables": {}, + "nested_fields": { + "filter_by": { + // NEW!! + // Does the connector support filtering over nested arrays + "nested_arrays": { + // Does the connector support filtering over nested arrays using existential quantification. + // This means the connector must support ExistsInCollection::NestedScalarCollection. + "exists": {}, + // Does the connector support filtering over nested arrays by checking if the array contains a value. + /// This must be supported for all types that can be contained in an array that implement an 'eq' + /// comparison operator. + "contains": {}, + // Does the connector support filtering over nested arrays by checking if the array is empty. + // This must be supported no matter what type is contained in the array. + "isEmpty": {}, + }, + }, + "order_by": {}, + "aggregates": {}, + }, + "exists": { + "nested_collections": {}, + }, + }, + "mutation": {}, + "relationships": { + "relation_comparisons": {}, + "order_by_aggregate": {}, + }, +} +``` + +## Alternative Proposals + +### Implicit existential quantification + +We could update the definition of `ComparisonTarget::Column` to specify that if the targeted column is an array of scalars, then the comparison operator should be considered to be existentially quantified over all elements in the array. In simpler terms, at least one element in the array of scalars must match the specified comparison. + +This behaviour for `ComparisonTarget::Column` is new, and as such would need to be gated behind a new capability so that existing connectors would not receive queries expecting this behaviour. + +```json +{ + "query": { + "aggregates": {}, + "variables": {}, + "nested_fields": { + "filter_by": { + "scalar_arrays": {} // NEW!! + }, + "order_by": {}, + "aggregates": {} + }, + "exists": { + "nested_collections": {} + } + }, + "mutation": {}, + "relationships": { + "relation_comparisons": {}, + "order_by_aggregate": {} + } +} +``` + +#### Issues + +##### Implicit existential quantification + +This new interpretation of the query structure is implicit, which is suboptimal as it may be non-obvious to connector authors that this is how things are supposed to work. It is better to be explicit with such things. + +It also disallows direct comparison of a complex type to a literal value of that complex type (something that isn't supported right now, anyway). For example, this is now inexpressible due to the implicit existential quantification: + +```yaml +collection: Users +query: + fields: + id: + type: column + column: id + predicate: + type: binary_comparison_operator + column: + type: column + name: roles + path: [] + operator: eq + value: + type: scalar + value: ["admin", "users"] # The roles must be exactly admin and users, in that order +arguments: {} +collection_relationships: {} +``` + +A way that _explicit_ existential quantification could be represented could be to add a new variant to `ComparisonTarget`, `ExistsInColumn`: + +```rust +pub enum ComparisonTarget { + Column { + /// The name of the column + name: FieldName, + /// Path to a nested field within an object column + field_path: Option>, + }, + ExistsInColumn { + /// The name of the column + name: FieldName, + /// Path to a nested field within an object column + field_path: Option>, + }, + Aggregate { + /// The aggregation method to use + aggregate: Aggregate, + /// Non-empty collection of relationships to traverse + path: Vec, + }, +} +``` + +Then you could write a query more explicitly like so: + +```yaml +collection: Users +query: + fields: + id: + type: column + column: id + predicate: + type: binary_comparison_operator + column: + type: exists_in_column # New! + name: roles + path: [] + operator: eq + value: + type: scalar + value: admin +arguments: {} +collection_relationships: {} +``` + +The use of `ComparisonTarget::ExistsInColumn` would be gated behind the proposed capability. + +The issue with this is that it requires more work to support, as more extensive changes are required to v3-engine so that it uses this new `ComparisonTarget`. + +##### How about existential quantification over arrays of nested objects? + +What about if we had the following `User` and `Role` object types: + +```yaml +User: + fields: + id: + type: + type: named + name: Int + name: + type: + type: named + name: String + roles: + type: + type: array + element_type: + type: named + name: Role + +Role: + fields: + name: + type: + type: named + name: String + assignedAt: + type: + type: named + name: DateTime +``` + +An example object might be: + +```yaml +id: 1 +name: Daniel +roles: + - name: admin + assignedAt: 2024-09-25T14:51:00Z + - name: user + assignedAt: 2024-09-25T12:14:00Z +``` + +Could we write a query that filtered by the `name` property in the nested array of `Role` object types like so, thanks to the implicit existential quantification? + +```yaml +collection: Users +query: + fields: + id: + type: column + column: id + predicate: + type: binary_comparison_operator + column: + type: column + name: roles + field_path: [name] # Navigate into the name property of the Role object + path: [] + operator: eq + value: + type: scalar + value: admin +arguments: {} +collection_relationships: {} +``` + +This is inadvisable to allow, and such a query can already be expressed using explicit nested collection `Expression::Exists` queries, like so: + +```yaml +collection: Users +query: + fields: + id: + type: column + column: id + predicate: + type: exists + in_collection: + type: nested_collection + column: roles + predicate: + type: binary_comparison_operator + column: + type: column + name: name + path: [] + operator: eq + value: + type: scalar + value: admin +arguments: {} +collection_relationships: {} +``` + +We should state that the existential quantification only works when the _end-point_ of the `ComparisonTarget::Column` is targeting an array of scalars. `field_path` can only be used to navigate nested objects. + +### Expression::ArrayComparison with exists support nested inside + +We could add another variant to Expression to represent a comparison against an array type: + +```rust +pub enum Expression { + ... + ArrayComparison { + column: ComparisonTarget, + comparison: ArrayComparison, + }, +} +``` + +The `ArrayComparison` type would then capture the different types of comparisons one could do against the array: + +```rust +pub enum ArrayComparison { + /// Perform a binary comparison operation against the elements of the array. + /// The comparison is asserting that there must exist at least one element + /// in the array that the comparison succeeds for + ExistsBinary { + operator: ComparisonOperatorName, + value: ComparisonValue, + }, + /// Perform a unary comparison operation against the elements of the array. + /// The comparison is asserting that there must exist at least one element + /// in the array that the comparison succeeds for + ExistsUnary { + operator: UnaryComparisonOperator + }, + /// Nest a comparison through one level of a nested array, asserting that + /// there must exist at least one element in the outer array who matches + /// the comparison applied to the inner array + ExistsInNestedArray { + nested_comparison: Box + }, + /// Check if the array contains the specified value + Contains { + value: ComparisonValue, + }, + /// Check is the array is empty + IsEmpty, +} +``` + +Whether or not these new array comparisons would be supported by the connector would be declared in the capabilities: + +```jsonc +{ + "query": { + "aggregates": {}, + "variables": {}, + "nested_fields": { + "filter_by": { + // NEW!! + // Does the connector support filtering over nested arrays + "nested_arrays": { + // Does the connector support filtering over nested arrays using existential quantification. + // This must be supported for all types that can be contained in an array that have a comparison operator. + "exists": { + // Does the connector support filtering over nested arrays of arrays using existential quantification + "nested": {}, + }, + // Does the connector support filtering over nested arrays by checking if the array contains a value. + // This must be supported for all types that can be contained in an array. + "contains": {}, + // Does the connector support filtering over nested arrays by checking if the array is empty. + // This must be supported no matter what type is contained in the array. + "isEmpty": {}, + }, + }, + "order_by": {}, + "aggregates": {}, + }, + "exists": { + "nested_collections": {}, + }, + }, + "mutation": {}, + "relationships": { + "relation_comparisons": {}, + "order_by_aggregate": {}, + }, +} +``` + +#### Issues + +This approach doesn't allow use of logical operators beyond the new `ArrayComparison` boundary. So, for example, if the following data existed: + +``` +[ + Customer { + nested_numbers: [ [2,1], [1,0] ] + }, + Customer { + nested_numbers: [ [2,3], [1,0] ] + } +] +``` + +and we wanted to ask the following question: + +> give me all customers where there exists at least one inner array element that is greater than 1 and also less than 3. + +```graphql +query { + Customer( + where: { nested_numbers: { inner: { _and: [{ _gt: 1 }, { _lt: 3 }] } } } + ) { + id + } +} +``` + +We couldn't because there's no way to nest logical operators inside a `ArrayComparison::ExistsBinary`. + +```yaml +collection: customers +query: + fields: + id: + type: column + column: id + predicate: + type: array_comparison + column: + type: column + name: nested_numbers + path: [] + comparison: + type: exists_in_nested_array + nested_comparison: + type: exists_binary # Nowhere to nest an AND inside this exists_binary + operator: gt + value: + type: literal + value: 1 + +arguments: {} +collection_relationships: {} +```