From 1ff50d3d4d31e6944ae16f7eba6ce3c7ed6aa313 Mon Sep 17 00:00:00 2001 From: Josh Suereth Date: Wed, 10 Apr 2024 13:39:18 -0400 Subject: [PATCH 1/8] Debug is much nicer experience for template generation failures. --- Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5e8d6c2f..8f0833f1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,11 +11,12 @@ COPY data /build/data COPY src /build/src COPY templates /build/templates COPY tests build/tests -RUN cargo build --release +# Don't build release, so we get template debugging output. +RUN cargo build # The runtime image FROM alpine:3.18.3 LABEL maintainer="The OpenTelemetry Authors" WORKDIR /weaver -COPY --from=weaver-build /build/target/release/weaver /weaver/weaver +COPY --from=weaver-build /build/target/debug/weaver /weaver/weaver ENTRYPOINT ["/weaver/weaver"] \ No newline at end of file From 6900d592af9eccc7ae850239f5c904efbfd18165 Mon Sep 17 00:00:00 2001 From: Josh Suereth Date: Wed, 10 Apr 2024 13:39:39 -0400 Subject: [PATCH 2/8] Add helper for dealing with seperated ids in semconv. --- crates/weaver_forge/src/lib.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index 69723cc6..4c43cdcc 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -356,6 +356,7 @@ impl TemplateEngine { case_converter(self.target_config.field_name.clone()), ); env.add_filter("flatten", flatten); + env.add_filter("split_id", split_id); // env.add_filter("unique_attributes", extensions::unique_attributes); // env.add_filter("instrument", extensions::instrument); @@ -421,6 +422,17 @@ fn flatten(value: Value) -> Result { Ok(Value::from(result)) } +// Helper function to take an "id" and split it by '.' into namespaces. +fn split_id(value: Value) -> Result { + match value.as_str() { + Some(id) => { + let values: Vec = id.split(".").map(|s| Value::from_safe_string(s.to_owned())).collect(); + Ok(Value::from_iterator(values.into_iter())) + }, + None => Err(minijinja::Error::new(minijinja::ErrorKind::InvalidOperation, format!("Expected string, found: {value}"))), + } +} + #[cfg(test)] mod tests { use std::collections::HashSet; From 244d11e4338385bea927f81648086fbe15976709 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 21:29:03 +0000 Subject: [PATCH 3/8] chore(deps): bump minijinja from 1.0.17 to 1.0.20 Bumps [minijinja](https://github.com/mitsuhiko/minijinja) from 1.0.17 to 1.0.20. - [Release notes](https://github.com/mitsuhiko/minijinja/releases) - [Changelog](https://github.com/mitsuhiko/minijinja/blob/main/CHANGELOG.md) - [Commits](https://github.com/mitsuhiko/minijinja/compare/1.0.17...1.0.20) --- updated-dependencies: - dependency-name: minijinja dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- crates/weaver_forge/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f6d27ec9..7cf554e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2144,9 +2144,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "minijinja" -version = "1.0.17" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b7eb3568385ed7a6594dd0b27b8fb52a8d952b80bf3c9b6bc65027ecf004923" +checksum = "fb5c5e3d2b4c0a6832bd3d571f7c19a7c1c1f05f11a6e85ae1a29f76be5f9455" dependencies = [ "aho-corasick", "memo-map", diff --git a/crates/weaver_forge/Cargo.toml b/crates/weaver_forge/Cargo.toml index d9bbbd2f..fa10fd63 100644 --- a/crates/weaver_forge/Cargo.toml +++ b/crates/weaver_forge/Cargo.toml @@ -18,7 +18,7 @@ weaver_resolver = { path = "../weaver_resolver" } weaver_resolved_schema = { path = "../weaver_resolved_schema" } weaver_semconv = { path = "../weaver_semconv" } -minijinja = { version = "1.0.17", features = ["loader", "custom_syntax", "debug"] } +minijinja = { version = "1.0.20", features = ["loader", "custom_syntax", "debug"] } convert_case = "0.6.0" globset = { version = "0.4.14", features = ["serde1"] } jaq-core = "1.2.1" From 6c2ce555db955d0d15399ec761734d37eff01fd1 Mon Sep 17 00:00:00 2001 From: Josh Suereth Date: Thu, 11 Apr 2024 08:55:29 -0400 Subject: [PATCH 4/8] Fix test suite for running on windows --- .../attribute_group/attributes_jvm_memory.md | 2 +- .../attribute_group/registry_db.md | 21 +--------- .../attribute_group/registry_http.md | 8 +--- .../attribute_group/registry_network.md | 3 +- .../attribute_group/registry_url.md | 2 +- .../attribute_group/registry_user_agent.md | 3 +- crates/weaver_forge/src/lib.rs | 41 ++++++++++++------- .../templates/test/attribute_group.md | 4 +- .../templates/test/attribute_type.j2 | 2 +- crates/weaver_forge/templates/test/group.md | 2 +- 10 files changed, 37 insertions(+), 51 deletions(-) diff --git a/crates/weaver_forge/expected_output/attribute_group/attributes_jvm_memory.md b/crates/weaver_forge/expected_output/attribute_group/attributes_jvm_memory.md index 9efa5f68..5f3764f9 100644 --- a/crates/weaver_forge/expected_output/attribute_group/attributes_jvm_memory.md +++ b/crates/weaver_forge/expected_output/attribute_group/attributes_jvm_memory.md @@ -1,4 +1,4 @@ -## Group `attributes.jvm.memory` (attribute_group) +## Group `attributes_jvm_memory` (attribute_group) ### Brief diff --git a/crates/weaver_forge/expected_output/attribute_group/registry_db.md b/crates/weaver_forge/expected_output/attribute_group/registry_db.md index c01af75e..f1d35ed0 100644 --- a/crates/weaver_forge/expected_output/attribute_group/registry_db.md +++ b/crates/weaver_forge/expected_output/attribute_group/registry_db.md @@ -1,4 +1,4 @@ -## Group `registry.db` (attribute_group) +## Group `registry_db` (attribute_group) ### Brief @@ -14,7 +14,6 @@ prefix: db The data center of the coordinating node for a query. - - Requirement Level: Recommended - Tag: tech-specific-cassandra @@ -28,7 +27,6 @@ The data center of the coordinating node for a query. The ID of the coordinating node for a query. - - Requirement Level: Recommended - Tag: tech-specific-cassandra @@ -42,7 +40,6 @@ The ID of the coordinating node for a query. The consistency level of the query. Based on consistency values from [CQL](https://docs.datastax.com/en/cassandra-oss/3.0/cassandra/dml/dmlConfigConsistency.html). - - Requirement Level: Recommended - Tag: tech-specific-cassandra @@ -55,7 +52,6 @@ The consistency level of the query. Based on consistency values from [CQL](https Whether or not the query is idempotent. - - Requirement Level: Recommended - Tag: tech-specific-cassandra @@ -68,7 +64,6 @@ Whether or not the query is idempotent. The fetch size used for paging, i.e. how many rows will be returned at once. - - Requirement Level: Recommended - Tag: tech-specific-cassandra @@ -84,7 +79,6 @@ The fetch size used for paging, i.e. how many rows will be returned at once. The number of times a query was speculatively executed. Not set or `0` if the query was not executed speculatively. - - Requirement Level: Recommended - Tag: tech-specific-cassandra @@ -116,7 +110,6 @@ This mirrors the db.sql.table attribute but references cassandra rather than sql The connection string used to connect to the database. It is recommended to remove embedded credentials. - - Requirement Level: Recommended - Tag: db-generic @@ -240,7 +233,6 @@ Cosmos DB sub status code. Represents the identifier of an Elasticsearch cluster. - - Requirement Level: Recommended - Tag: tech-specific-elasticsearch @@ -256,7 +248,6 @@ Represents the identifier of an Elasticsearch cluster. Represents the human-readable identifier of the node/instance to which a request was routed. - - Requirement Level: Recommended - Tag: tech-specific-elasticsearch @@ -272,7 +263,6 @@ Represents the human-readable identifier of the node/instance to which a request A dynamic value in the url path. - Many Elasticsearch url paths allow dynamic values. These SHOULD be recorded in span attributes in the format `db.elasticsearch.path_parts.`, where `` is the url path part name. The implementation SHOULD reference the [elasticsearch schema](https://raw.githubusercontent.com/elastic/elasticsearch-specification/main/output/schema/schema.json) in order to map the path part values to their names. - Requirement Level: Recommended @@ -291,7 +281,6 @@ Many Elasticsearch url paths allow dynamic values. These SHOULD be recorded in s The fully-qualified class name of the [Java Database Connectivity (JDBC)](https://docs.oracle.com/javase/8/docs/technotes/guides/jdbc/) driver used to connect. - - Requirement Level: Recommended - Tag: tech-specific-jdbc @@ -308,7 +297,6 @@ The fully-qualified class name of the [Java Database Connectivity (JDBC)](https: The MongoDB collection being accessed within the database stated in `db.name`. - - Requirement Level: Recommended - Tag: tech-specific-mongodb @@ -325,7 +313,6 @@ The MongoDB collection being accessed within the database stated in `db.name`. The Microsoft SQL Server [instance name](https://docs.microsoft.com/sql/connect/jdbc/building-the-connection-url?view=sql-server-ver15) connecting to. This name is used to determine the port of a named instance. - If setting a `db.mssql.instance_name`, `server.port` is no longer required (but still recommended if non-standard). - Requirement Level: Recommended @@ -341,7 +328,6 @@ If setting a `db.mssql.instance_name`, `server.port` is no longer required (but This attribute is used to report the name of the database being accessed. For commands that switch the database, this should be set to the target database (even if the command fails). - In some SQL databases, the database name to be used is called "schema name". In case there are multiple layers that could be considered for database name (e.g. Oracle instance name and schema name), the database name to be used is the more specific layer (e.g. Oracle schema name). - Requirement Level: Recommended @@ -360,7 +346,6 @@ In some SQL databases, the database name to be used is called "schema name". In The name of the operation being executed, e.g. the [MongoDB command name](https://docs.mongodb.com/manual/reference/command/#database-operations) such as `findAndModify`, or the SQL keyword. - When setting this to an SQL keyword, it is not recommended to attempt any client-side parsing of `db.statement` just to get this property, but it should be set if the operation name is provided by the library being instrumented. If the SQL statement has an ambiguous operation, or performs more than one operation, this value may be omitted. - Requirement Level: Recommended @@ -380,7 +365,6 @@ When setting this to an SQL keyword, it is not recommended to attempt any client The index of the database being accessed as used in the [`SELECT` command](https://redis.io/commands/select), provided as an integer. To be used instead of the generic `db.name` attribute. - - Requirement Level: Recommended - Tag: tech-specific-redis @@ -416,7 +400,6 @@ It is not recommended to attempt any client-side parsing of `db.statement` just The database statement being executed. - - Requirement Level: Recommended - Tag: db-generic @@ -445,7 +428,6 @@ An identifier for the database management system (DBMS) product being used. See Username for accessing the database. - - Requirement Level: Recommended - Tag: db-generic @@ -462,7 +444,6 @@ Username for accessing the database. An identifier (address, unique name, or any other identifier) of the database instance that is executing queries or mutations on the current connection. This is useful in cases where the database is running in a clustered environment and the instrumentation is able to record the node executing the query. The client may obtain this value in databases like MySQL using queries like `select @@hostname`. - - Requirement Level: Recommended - Tag: db-generic diff --git a/crates/weaver_forge/expected_output/attribute_group/registry_http.md b/crates/weaver_forge/expected_output/attribute_group/registry_http.md index 95315bcb..19880802 100644 --- a/crates/weaver_forge/expected_output/attribute_group/registry_http.md +++ b/crates/weaver_forge/expected_output/attribute_group/registry_http.md @@ -1,4 +1,4 @@ -## Group `registry.http` (attribute_group) +## Group `registry_http` (attribute_group) ### Brief @@ -14,7 +14,6 @@ prefix: http The size of the request payload body in bytes. This is the number of bytes transferred excluding headers and is often, but not always, present as the [Content-Length](https://www.rfc-editor.org/rfc/rfc9110.html#field.content-length) header. For requests using transport encoding, this should be the compressed size. - - Requirement Level: Recommended - Type: int @@ -28,7 +27,6 @@ The size of the request payload body in bytes. This is the number of bytes trans HTTP request headers, `` being the normalized HTTP Header name (lowercase), the value being the header values. - Instrumentations SHOULD require an explicit configuration of which headers are to be captured. Including all request headers can be a security risk - explicit configuration helps avoid leaking sensitive information. The `User-Agent` header is already captured in the `user_agent.original` attribute. Users MAY explicitly configure instrumentations to capture them even though it is not recommended. The attribute value MUST consist of either multiple header values as an array of strings or a single-item array containing a possibly comma-concatenated string, depending on the way the HTTP library provides access to headers. @@ -98,7 +96,6 @@ Original HTTP method sent by the client in the request line. The ordinal number of request resending attempt (for any reason, including redirects). - The resend count SHOULD be updated each time an HTTP request gets resent by the client, regardless of what was the cause of the resending (e.g. redirection, authorization failure, 503 Server Unavailable, network issues, or any other). - Requirement Level: Recommended @@ -114,7 +111,6 @@ The resend count SHOULD be updated each time an HTTP request gets resent by the The size of the response payload body in bytes. This is the number of bytes transferred excluding headers and is often, but not always, present as the [Content-Length](https://www.rfc-editor.org/rfc/rfc9110.html#field.content-length) header. For requests using transport encoding, this should be the compressed size. - - Requirement Level: Recommended - Type: int @@ -128,7 +124,6 @@ The size of the response payload body in bytes. This is the number of bytes tran HTTP response headers, `` being the normalized HTTP Header name (lowercase), the value being the header values. - Instrumentations SHOULD require an explicit configuration of which headers are to be captured. Including all response headers can be a security risk - explicit configuration helps avoid leaking sensitive information. Users MAY explicitly configure instrumentations to capture them even though it is not recommended. The attribute value MUST consist of either multiple header values as an array of strings or a single-item array containing a possibly comma-concatenated string, depending on the way the HTTP library provides access to headers. @@ -164,7 +159,6 @@ The attribute value MUST consist of either multiple header values as an array of The matched route, that is, the path template in the format used by the respective server framework. - MUST NOT be populated when this is not supported by the HTTP server framework as the route attribute should have low-cardinality and the URI path can NOT substitute it. SHOULD include the [application root](/docs/http/http-spans.md#http-server-definitions) if there is one. diff --git a/crates/weaver_forge/expected_output/attribute_group/registry_network.md b/crates/weaver_forge/expected_output/attribute_group/registry_network.md index cf198ab4..70efc78d 100644 --- a/crates/weaver_forge/expected_output/attribute_group/registry_network.md +++ b/crates/weaver_forge/expected_output/attribute_group/registry_network.md @@ -1,4 +1,4 @@ -## Group `registry.network` (attribute_group) +## Group `registry_network` (attribute_group) ### Brief @@ -176,7 +176,6 @@ Version of the protocol specified in `network.protocol.name`. [OSI transport layer](https://osi-model.com/transport-layer/) or [inter-process communication method](https://wikipedia.org/wiki/Inter-process_communication). - The value SHOULD be normalized to lowercase. Consider always setting the transport when setting a port number, since diff --git a/crates/weaver_forge/expected_output/attribute_group/registry_url.md b/crates/weaver_forge/expected_output/attribute_group/registry_url.md index 43298ed5..f7dfd4a8 100644 --- a/crates/weaver_forge/expected_output/attribute_group/registry_url.md +++ b/crates/weaver_forge/expected_output/attribute_group/registry_url.md @@ -1,4 +1,4 @@ -## Group `registry.url` (attribute_group) +## Group `registry_url` (attribute_group) ### Brief diff --git a/crates/weaver_forge/expected_output/attribute_group/registry_user_agent.md b/crates/weaver_forge/expected_output/attribute_group/registry_user_agent.md index e552920c..0b45e086 100644 --- a/crates/weaver_forge/expected_output/attribute_group/registry_user_agent.md +++ b/crates/weaver_forge/expected_output/attribute_group/registry_user_agent.md @@ -1,4 +1,4 @@ -## Group `registry.user_agent` (attribute_group) +## Group `registry_user_agent` (attribute_group) ### Brief @@ -14,7 +14,6 @@ prefix: user_agent Value of the [HTTP User-Agent](https://www.rfc-editor.org/rfc/rfc9110.html#field.user-agent) header sent by the client. - - Requirement Level: Recommended - Type: string diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index 4c43cdcc..caded469 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -426,10 +426,16 @@ fn flatten(value: Value) -> Result { fn split_id(value: Value) -> Result { match value.as_str() { Some(id) => { - let values: Vec = id.split(".").map(|s| Value::from_safe_string(s.to_owned())).collect(); + let values: Vec = id + .split(".") + .map(|s| Value::from_safe_string(s.to_owned())) + .collect(); Ok(Value::from_iterator(values.into_iter())) - }, - None => Err(minijinja::Error::new(minijinja::ErrorKind::InvalidOperation, format!("Expected string, found: {value}"))), + } + None => Err(minijinja::Error::new( + minijinja::ErrorKind::InvalidOperation, + format!("Expected string, found: {value}"), + )), } } @@ -521,8 +527,10 @@ mod tests { // Compare files in both sets for file in expected_files.intersection(&observed_files) { - let file1_content = fs::read_to_string(expected_dir.as_ref().join(file))?; - let file2_content = fs::read_to_string(observed_dir.as_ref().join(file))?; + let file1_content = + fs::read_to_string(expected_dir.as_ref().join(file))?.replace("\r\n", "\n"); + let file2_content = + fs::read_to_string(observed_dir.as_ref().join(file))?.replace("\r\n", "\n"); if file1_content != file2_content { are_identical = false; @@ -539,18 +547,23 @@ mod tests { break; } } - // If any file is unique to one directory, they are not identical - if !expected_files + let not_in_observed = expected_files .difference(&observed_files) - .collect::>() - .is_empty() - || !observed_files - .difference(&expected_files) - .collect::>() - .is_empty() - { + .collect::>(); + if !not_in_observed.is_empty() { + are_identical = false; + eprintln!("Observed output is missing files: {:?}", not_in_observed); + } + let not_in_expected = observed_files + .difference(&expected_files) + .collect::>(); + if !not_in_expected.is_empty() { are_identical = false; + eprintln!( + "Observed output has unexpected files: {:?}", + not_in_expected + ); } Ok(are_identical) diff --git a/crates/weaver_forge/templates/test/attribute_group.md b/crates/weaver_forge/templates/test/attribute_group.md index 1fed8404..80fb05f0 100644 --- a/crates/weaver_forge/templates/test/attribute_group.md +++ b/crates/weaver_forge/templates/test/attribute_group.md @@ -1,7 +1,7 @@ {%- set file_name = ctx.id | file_name -%} {{- template.set_file_name("attribute_group/" ~ file_name ~ ".md") -}} -## Group `{{ ctx.id }}` ({{ ctx.type }}) +## Group `{{ ctx.id | split_id | list | join("_") }}` ({{ ctx.type }}) ### Brief @@ -14,7 +14,7 @@ prefix: {{ ctx.prefix }} {% for attribute in ctx.attributes %} #### Attribute `{{ attribute.name }}` -{{ attribute.brief }} +{{ attribute.brief | trim }} {% if attribute.note %} {{ attribute.note | trim }} diff --git a/crates/weaver_forge/templates/test/attribute_type.j2 b/crates/weaver_forge/templates/test/attribute_type.j2 index 1c9147ba..55c57e4b 100644 --- a/crates/weaver_forge/templates/test/attribute_type.j2 +++ b/crates/weaver_forge/templates/test/attribute_type.j2 @@ -1,5 +1,5 @@ {%- if attribute.type is mapping %} -- Type: Enum [{{ attribute.type.members | map(attribute="value") | join(", ") }}] +- Type: Enum [{{ attribute.type.members | map(attribute="value") | join(", ") | trim }}] {%- else %} - Type: {{ attribute.type }} {%- endif %} \ No newline at end of file diff --git a/crates/weaver_forge/templates/test/group.md b/crates/weaver_forge/templates/test/group.md index 74a7953d..1d14bc87 100644 --- a/crates/weaver_forge/templates/test/group.md +++ b/crates/weaver_forge/templates/test/group.md @@ -47,7 +47,7 @@ prefix: {{ ctx.prefix }} ## Lineage -Source file: {{ ctx.lineage.source_file }} +Source file: {{ ctx.lineage.source_file | replace("\\", "/") }} {% for item in ctx.lineage.attributes -%} attribute: {{ item.id }} From f9bd33db6debc2d5ad902965cd3a1d7d1b048cc8 Mon Sep 17 00:00:00 2001 From: Josh Suereth Date: Thu, 11 Apr 2024 13:19:43 -0400 Subject: [PATCH 5/8] Fixes from code review. --- crates/weaver_forge/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index caded469..7e9332eb 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -423,14 +423,14 @@ fn flatten(value: Value) -> Result { } // Helper function to take an "id" and split it by '.' into namespaces. -fn split_id(value: Value) -> Result { +fn split_id(value: Value) -> Result, minijinja::Error> { match value.as_str() { Some(id) => { let values: Vec = id - .split(".") + .split('.') .map(|s| Value::from_safe_string(s.to_owned())) .collect(); - Ok(Value::from_iterator(values.into_iter())) + Ok(values) } None => Err(minijinja::Error::new( minijinja::ErrorKind::InvalidOperation, From b33000b2d8e76b6adb70dad1add605215c2037f3 Mon Sep 17 00:00:00 2001 From: Josh Suereth Date: Thu, 11 Apr 2024 17:14:18 -0400 Subject: [PATCH 6/8] Update crates/weaver_forge/src/lib.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Laurent QuĂ©rel --- crates/weaver_forge/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index 7e9332eb..86f2022c 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -430,7 +430,7 @@ fn split_id(value: Value) -> Result, minijinja::Error> { .split('.') .map(|s| Value::from_safe_string(s.to_owned())) .collect(); - Ok(values) + Ok(Value::from(values)) } None => Err(minijinja::Error::new( minijinja::ErrorKind::InvalidOperation, From b5a90bd4406993943280329ac3d0bd689b46e752 Mon Sep 17 00:00:00 2001 From: Josh Suereth Date: Thu, 11 Apr 2024 19:11:46 -0400 Subject: [PATCH 7/8] Revert "Update crates/weaver_forge/src/lib.rs" This reverts commit b33000b2d8e76b6adb70dad1add605215c2037f3. --- crates/weaver_forge/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index 86f2022c..7e9332eb 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -430,7 +430,7 @@ fn split_id(value: Value) -> Result, minijinja::Error> { .split('.') .map(|s| Value::from_safe_string(s.to_owned())) .collect(); - Ok(Value::from(values)) + Ok(values) } None => Err(minijinja::Error::new( minijinja::ErrorKind::InvalidOperation, From 57e6f1add0cb1c86554599332e6cc86f576fbea4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Laurent=20Qu=C3=A9rel?= Date: Thu, 11 Apr 2024 22:55:12 -0700 Subject: [PATCH 8/8] feat(policy): Introduce policy engine for semantic convention registry checks This commit introduces a new policy engine, `weaver_checker`, to validate semantic convention registries against custom rules defined using the Rego language. The engine offers enhanced flexibility and control over registry compliance checks. Key improvements: - Policy engine implementation: A new crate, weaver_checker, was created to wrap the `regorus` crate and provide a convenient interface for defining and executing `Rego` policies against semantic convention registries. - Policy separation: Policy checks are now decoupled from registry loading for improved performance and maintainability. - Policy examples: Example `Rego` policies for comparing released and unreleased versions of semantic conventions are included for reference and demonstration. - Documentation: Comprehensive documentation on the policy engine and its usage is added, including updates to the otel-weaver-platform diagram. - Unit tests and coverage: Unit tests were added to ensure the functionality of the policy engine, and test coverage was improved to meet project criteria. - Registry check command: A new CLI option `--before-resolution-policies` allows applying policies before schema resolution. --- .github/workflows/publish-docker.yml | 2 +- .gitignore | 6 +- Cargo.lock | 483 ++++++++++++++++-- Cargo.toml | 11 +- README.md | 10 +- crates/weaver_checker/Cargo.toml | 45 ++ crates/weaver_checker/README.md | 296 +++++++++++ .../allowed-external-types.toml | 8 + .../data/policies/invalid_policies.rego | 34 ++ .../policies/invalid_violation_object.rego | 73 +++ .../data/policies/otel_policies.rego | 79 +++ .../data/registries/registry.network.new.yaml | 22 + .../data/registries/registry.network.old.yaml | 20 + crates/weaver_checker/src/lib.rs | 259 ++++++++++ crates/weaver_checker/src/violation.rs | 53 ++ crates/weaver_forge/Cargo.toml | 2 +- crates/weaver_forge/src/lib.rs | 5 +- crates/weaver_resolved_schema/Cargo.toml | 2 +- crates/weaver_resolver/Cargo.toml | 3 +- .../allowed-external-types.toml | 1 + crates/weaver_resolver/src/lib.rs | 243 ++++----- crates/weaver_resolver/src/registry.rs | 10 +- crates/weaver_semconv/src/lib.rs | 23 +- crates/weaver_semconv_gen/Cargo.toml | 1 - .../allowed-external-types.toml | 1 - crates/weaver_semconv_gen/src/lib.rs | 33 +- deny.toml | 2 + docs/images/dependencies.svg | 392 +++++++------- docs/images/otel-weaver-platform.svg | 1 + schemas/otel_policies.rego | 91 ++++ src/error.rs | 77 +++ src/main.rs | 1 + src/registry/check.rs | 51 +- src/registry/generate.rs | 66 +-- src/registry/mod.rs | 134 ++++- src/registry/resolve.rs | 22 +- src/registry/stats.rs | 38 +- src/registry/update_markdown.rs | 7 +- src/resolve.rs | 28 +- src/search/mod.rs | 26 +- tests/resolution_process.rs | 24 +- 41 files changed, 2127 insertions(+), 558 deletions(-) create mode 100644 crates/weaver_checker/Cargo.toml create mode 100644 crates/weaver_checker/README.md create mode 100644 crates/weaver_checker/allowed-external-types.toml create mode 100644 crates/weaver_checker/data/policies/invalid_policies.rego create mode 100644 crates/weaver_checker/data/policies/invalid_violation_object.rego create mode 100644 crates/weaver_checker/data/policies/otel_policies.rego create mode 100644 crates/weaver_checker/data/registries/registry.network.new.yaml create mode 100644 crates/weaver_checker/data/registries/registry.network.old.yaml create mode 100644 crates/weaver_checker/src/lib.rs create mode 100644 crates/weaver_checker/src/violation.rs create mode 100644 docs/images/otel-weaver-platform.svg create mode 100644 schemas/otel_policies.rego create mode 100644 src/error.rs diff --git a/.github/workflows/publish-docker.yml b/.github/workflows/publish-docker.yml index af33b388..c90ed4cd 100644 --- a/.github/workflows/publish-docker.yml +++ b/.github/workflows/publish-docker.yml @@ -10,7 +10,7 @@ on: paths: - .github/workflows/publish-docker.yml - 'src/**' - - 'creates/**' + - 'crates/**' jobs: tests: diff --git a/.gitignore b/.gitignore index 742dedd1..d3ff7e18 100644 --- a/.gitignore +++ b/.gitignore @@ -26,4 +26,8 @@ just.zsh # test generated code **/observed_output/* -/output \ No newline at end of file +# Ignore output files generated by weaver +**/output/* + +# Coverage results +lcov.info \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 7cf554e8..9cc953f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,7 @@ dependencies = [ "cfg-if", "getrandom", "once_cell", + "serde", "version_check", "zerocopy", ] @@ -110,9 +111,12 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.81" +version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" +checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" +dependencies = [ + "backtrace", +] [[package]] name = "arc-swap" @@ -128,13 +132,13 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "async-trait" -version = "0.1.79" +version = "0.1.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" +checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn 2.0.57", + "syn 2.0.58", ] [[package]] @@ -164,6 +168,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" + [[package]] name = "bincode" version = "1.3.3" @@ -173,6 +183,21 @@ dependencies = [ "serde", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -216,9 +241,15 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.15.4" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "bytecount" +version = "0.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" [[package]] name = "byteorder" @@ -255,9 +286,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.90" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" +checksum = "2678b2e3449475e95b0aa6f9b506a28e61b3dc8996592b983695e8ebb58a8b41" dependencies = [ "jobserver", "libc", @@ -283,7 +314,9 @@ checksum = "8a0d04d43504c61aa6c7531f1871dd0d418d91130162063b789da00fd7057a5e" dependencies = [ "android-tzdata", "iana-time-zone", + "js-sys", "num-traits", + "wasm-bindgen", "windows-targets 0.52.4", ] @@ -349,7 +382,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.57", + "syn 2.0.58", ] [[package]] @@ -370,6 +403,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "compact-rc" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf2bdc97c915ed231cf450d1dc4f7293b6135a46834f886f23cfdf8ac2ba4a23" + [[package]] name = "compact_str" version = "0.7.1" @@ -383,6 +422,32 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "const_format" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a214c7af3d04997541b18d432afaff4c455e79e2029079647e72fc2bd27673" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f6ff08fd20f4f299298a28e2dfa8a8ba1036e6cd2460ac1de7b425d76f2500" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + [[package]] name = "convert_case" version = "0.6.0" @@ -544,7 +609,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.10.0", - "syn 2.0.57", + "syn 2.0.58", ] [[package]] @@ -555,9 +620,15 @@ checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" dependencies = [ "darling_core", "quote", - "syn 2.0.57", + "syn 2.0.58", ] +[[package]] +name = "data-encoding" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5" + [[package]] name = "deranged" version = "0.3.11" @@ -582,6 +653,7 @@ checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", "crypto-common", + "subtle", ] [[package]] @@ -607,9 +679,9 @@ dependencies = [ [[package]] name = "downcast-rs" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" +checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" [[package]] name = "dunce" @@ -631,9 +703,9 @@ checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "encoding_rs" -version = "0.8.33" +version = "0.8.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" +checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" dependencies = [ "cfg-if", ] @@ -654,6 +726,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "fancy-regex" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2" +dependencies = [ + "bit-set", + "regex", +] + [[package]] name = "fastdivide" version = "0.4.1" @@ -709,6 +791,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fraction" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3027ae1df8d41b4bed2241c8fdad4acc1e7af60c8e17743534b545e77182d678" +dependencies = [ + "lazy_static", + "num", +] + [[package]] name = "fs4" version = "0.6.6" @@ -798,13 +890,15 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.12" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -1181,7 +1275,7 @@ checksum = "1dff438f14e67e7713ab9332f5fd18c8f20eb7eb249494f6c2bf170522224032" dependencies = [ "proc-macro2", "quote", - "syn 2.0.57", + "syn 2.0.58", ] [[package]] @@ -1473,7 +1567,7 @@ version = "0.41.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf8e5f72ec9cad9ee44714b9a4ec7427b540a2418b62111f5e3a715bebe1ed9d" dependencies = [ - "base64", + "base64 0.21.7", "bstr", "gix-command", "gix-credentials", @@ -1607,9 +1701,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fbd2820c5e49886948654ab546d0688ff24530286bdcf8fca3cefb16d4618eb" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" dependencies = [ "bytes", "fnv", @@ -1652,12 +1746,27 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "hifijson" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18ae468bcb4dfecf0e4949ee28abbc99076b6a0077f51ddbc94dbfff8e6a870c" +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + [[package]] name = "home" version = "0.5.9" @@ -1859,6 +1968,15 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +[[package]] +name = "iso8601" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "924e5d73ea28f59011fec52a0d12185d496a9b075d360657aed2a5707f701153" +dependencies = [ + "nom", +] + [[package]] name = "itertools" version = "0.11.0" @@ -1890,7 +2008,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03d6a5713b8f33675abfac79d1db0022a3f28764b2a6b96a185c199ad8dab86d" dependencies = [ "aho-corasick", - "base64", + "base64 0.21.7", "hifijson", "jaq-interpret", "libm", @@ -1947,9 +2065,9 @@ dependencies = [ [[package]] name = "jobserver" -version = "0.1.28" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" +checksum = "f08474e32172238f2827bd160c67871cdb2801430f65c3979184dc362e3ca118" dependencies = [ "libc", ] @@ -1963,6 +2081,49 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "jsonschema" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a071f4f7efc9a9118dfb627a0a94ef247986e1ab8606a4c806ae2b3aa3b6978" +dependencies = [ + "ahash", + "anyhow", + "base64 0.21.7", + "bytecount", + "fancy-regex", + "fraction", + "getrandom", + "iso8601", + "itoa", + "memchr", + "num-cmp", + "once_cell", + "parking_lot", + "percent-encoding", + "regex", + "serde", + "serde_json", + "time", + "url", + "uuid", +] + +[[package]] +name = "jsonwebtoken" +version = "9.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9ae10193d25051e74945f1ea2d0b42e03cc3b890f7e4cc5faa44997d808193f" +dependencies = [ + "base64 0.21.7", + "js-sys", + "pem", + "ring", + "serde", + "serde_json", + "simple_asn1", +] + [[package]] name = "jwalk" version = "0.8.1" @@ -2093,7 +2254,17 @@ checksum = "5cf92c10c7e361d6b99666ec1c6f9805b0bea2c3bd8c78dc6fe98ac5bd78db11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.57", + "syn 2.0.58", +] + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", ] [[package]] @@ -2152,6 +2323,7 @@ dependencies = [ "memo-map", "self_cell", "serde", + "serde_json", ] [[package]] @@ -2207,12 +2379,84 @@ dependencies = [ "winapi", ] +[[package]] +name = "num" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-cmp" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63335b2e2c34fae2fb0aa2cecfd9f0832a1e24b3b32ecec612c3426d46dc8aaa" + +[[package]] +name = "num-complex" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" +dependencies = [ + "num-traits", +] + [[package]] name = "num-conv" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d869c01cc0c455284163fd0092f1f93835385ccab5a98a0dcc497b2f8bf055a9" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.18" @@ -2341,6 +2585,16 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" +[[package]] +name = "pem" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e459365e590736a54c3fa561947c84837534b8e9af6fc5bf781307e82658fae" +dependencies = [ + "base64 0.22.0", + "serde", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -2378,7 +2632,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.57", + "syn 2.0.58", ] [[package]] @@ -2448,6 +2702,15 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +[[package]] +name = "pori" +version = "0.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a63d338dec139f56dacc692ca63ad35a6be6a797442479b55acd611d79e906" +dependencies = [ + "nom", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -2505,9 +2768,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.35" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] @@ -2685,6 +2948,40 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +[[package]] +name = "regorus" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0f21e15237fe2834687a9063a19f7cf03d4e57188aad587ce855b1c1f723495" +dependencies = [ + "anyhow", + "chrono", + "chrono-tz", + "compact-rc", + "constant_time_eq", + "data-encoding", + "hex", + "hmac", + "itertools 0.12.1", + "jsonschema", + "jsonwebtoken", + "lazy_static", + "md-5", + "num", + "rand 0.8.5", + "regex", + "scientific", + "semver", + "serde", + "serde_json", + "serde_yaml", + "sha1", + "sha2", + "url", + "uuid", + "wax", +] + [[package]] name = "remove_dir_all" version = "0.5.3" @@ -2700,7 +2997,7 @@ version = "0.11.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" dependencies = [ - "base64", + "base64 0.21.7", "bytes", "encoding_rs", "futures-core", @@ -2817,7 +3114,7 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" dependencies = [ - "base64", + "base64 0.21.7", ] [[package]] @@ -2849,9 +3146,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" +checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47" [[package]] name = "ryu" @@ -2868,6 +3165,26 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scientific" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc53198b8e237c451c68dba8411a1f8bd92787657689f24d67ae3d6b98c39f59" +dependencies = [ + "scientific-macro", +] + +[[package]] +name = "scientific-macro" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2ee4885492bb655bfa05d039cd9163eb8fe9f79ddebf00ca23a1637510c2fd2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.58", +] + [[package]] name = "scoped-tls" version = "1.0.1" @@ -2922,7 +3239,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.57", + "syn 2.0.58", ] [[package]] @@ -2970,6 +3287,17 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sha1_smol" version = "1.0.0" @@ -3038,6 +3366,18 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa42c91313f1d05da9b26f267f931cf178d4aba455b4c4622dd7355eb80c6640" +[[package]] +name = "simple_asn1" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adc4e5204eb1910f40f9cfa375f6f05b68c3abac4b6fd879c8ff5e7ae8a0a085" +dependencies = [ + "num-bigint", + "num-traits", + "thiserror", + "time", +] + [[package]] name = "siphasher" version = "0.3.11" @@ -3153,7 +3493,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.57", + "syn 2.0.58", ] [[package]] @@ -3175,9 +3515,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.57" +version = "2.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11a6ae1e52eb25aab8f3fb9fca13be982a373b8f1157ca14b897a825ba4a2d35" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" dependencies = [ "proc-macro2", "quote", @@ -3220,7 +3560,7 @@ dependencies = [ "aho-corasick", "arc-swap", "async-trait", - "base64", + "base64 0.21.7", "bitpacking", "byteorder", "census", @@ -3423,7 +3763,7 @@ checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.57", + "syn 2.0.58", ] [[package]] @@ -3438,9 +3778,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.34" +version = "0.3.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" dependencies = [ "deranged", "itoa", @@ -3461,9 +3801,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" dependencies = [ "num-conv", "time-core", @@ -3582,7 +3922,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.57", + "syn 2.0.58", ] [[package]] @@ -3655,9 +3995,9 @@ checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" [[package]] name = "uluru" -version = "3.0.0" +version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "794a32261a1f5eb6a4462c81b59cec87b5c27d5deea7dd1ac8fc781c41d226db" +checksum = "7c8a2469e56e6e5095c82ccd3afb98dad95f7af7929aab6d8ba8d6e0f73657da" dependencies = [ "arrayvec", ] @@ -3757,6 +4097,12 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +[[package]] +name = "unicode-xid" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + [[package]] name = "unsafe-libyaml" version = "0.2.11" @@ -3775,7 +4121,7 @@ version = "2.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11f214ce18d8b2cbe84ed3aa6486ed3f5b285cf8d8fbdbce9f3f767a724adc35" dependencies = [ - "base64", + "base64 0.21.7", "flate2", "log", "once_cell", @@ -3823,6 +4169,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" dependencies = [ "getrandom", + "rand 0.8.5", "serde", ] @@ -3853,7 +4200,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "syn 2.0.57", + "syn 2.0.58", ] [[package]] @@ -3914,7 +4261,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.57", + "syn 2.0.58", "wasm-bindgen-shared", ] @@ -3948,7 +4295,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.57", + "syn 2.0.58", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3959,6 +4306,20 @@ version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +[[package]] +name = "wax" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d12a78aa0bab22d2f26ed1a96df7ab58e8a93506a3e20adb47c51a93b4e1357" +dependencies = [ + "const_format", + "itertools 0.11.0", + "nom", + "pori", + "regex", + "thiserror", +] + [[package]] name = "weaver" version = "0.1.0" @@ -3966,6 +4327,7 @@ dependencies = [ "clap", "crossterm", "ratatui", + "rayon", "serde", "serde_json", "serde_yaml", @@ -3973,7 +4335,7 @@ dependencies = [ "tui-textarea", "walkdir", "weaver_cache", - "weaver_diff", + "weaver_checker", "weaver_forge", "weaver_logger", "weaver_resolved_schema", @@ -3994,6 +4356,17 @@ dependencies = [ "thiserror", ] +[[package]] +name = "weaver_checker" +version = "0.1.0" +dependencies = [ + "regorus", + "serde", + "serde_json", + "serde_yaml", + "thiserror", +] + [[package]] name = "weaver_diff" version = "0.1.0" @@ -4058,6 +4431,7 @@ dependencies = [ "url", "walkdir", "weaver_cache", + "weaver_checker", "weaver_logger", "weaver_resolved_schema", "weaver_schema", @@ -4100,7 +4474,6 @@ dependencies = [ "thiserror", "weaver_cache", "weaver_diff", - "weaver_logger", "weaver_resolved_schema", "weaver_resolver", "weaver_semconv", @@ -4343,9 +4716,9 @@ checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" [[package]] name = "winnow" -version = "0.6.5" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dffa400e67ed5a4dd237983829e66475f0a4a26938c4b04c21baede6262215b8" +checksum = "f0c976aaaa0e1f90dbb21e9587cdaf1d9679a1cde8875c0d6bd83ab96a208352" dependencies = [ "memchr", ] @@ -4385,7 +4758,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.57", + "syn 2.0.58", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 99d0ae03..70d2320f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,7 @@ rust-version = "1.76" [workspace.dependencies] serde = { version = "1.0.197", features = ["derive"] } serde_yaml = "0.9.32" -serde_json = "1.0.115" +serde_json = { version = "1.0.115"} thiserror = "1.0.58" ureq = "2.9.6" regex = "1.10.3" @@ -49,7 +49,6 @@ name = "weaver" [dependencies] # local crates dependencies -weaver_diff = { path = "crates/weaver_diff" } weaver_logger = { path = "crates/weaver_logger" } weaver_resolver = { path = "crates/weaver_resolver" } weaver_template = { path = "crates/weaver_template" } @@ -59,6 +58,7 @@ weaver_semconv_gen = { path = "crates/weaver_semconv_gen" } weaver_schema = { path = "crates/weaver_schema" } weaver_cache = { path = "crates/weaver_cache" } weaver_forge = { path = "crates/weaver_forge" } +weaver_checker = { path = "crates/weaver_checker" } clap = { version = "4.5.4", features = ["derive"] } crossterm = "0.27.0" @@ -71,12 +71,7 @@ serde.workspace = true serde_yaml.workspace = true serde_json.workspace = true walkdir.workspace = true - -[package.metadata.cargo-machete] -# force cargo machete to ignore the following crates -# Remove this section one weaver_semconv_gen is ready to be used -# in the project -ignored = ["weaver_semconv_gen"] +rayon = "1.10.0" [profile.release] lto = true diff --git a/README.md b/README.md index d230b820..b11308fc 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,10 @@ Options: Local path or Git URL of the semantic convention registry to check [default: https://github.com/open-telemetry/semantic-conventions.git] -d, --registry-git-sub-dir Optional path in the Git repository where the semantic convention registry is located [default: model] + -b, --before-resolution-policies + Optional list of policy files to check against the files of the semantic convention registry before the resolution process + -h, --help + Print help ``` ### Sub-Command `registry generate` @@ -178,7 +182,8 @@ description and the current status of each crate: | [weaver_resolver](crates/weaver_resolver/README.md) | Telemetry Schema Resolution Process + Lineage | Work-In-Progress | | [weaver_cache](crates/weaver_cache/README.md) | Telemetry Schema and Semantic Convention Registry Cache | Work-In-Progress | | [weaver_logger](crates/weaver_logger/README.md) | Generic logger supported colorized output | Alpha | -| [weaver_forge](crates/weaver_forge/README.md) | Template engine used to generate artifacts from any serde json value | Alpha; Need more tests | +| [weaver_forge](crates/weaver_forge/README.md) | Template engine used to generate artifacts from any serde json value | Alpha | +| [weaver_checker](crates/weaver_checker/README.md) | Policy engine to enforce policies on telemetry data | Alpha | | xtask | Set of tasks to validate the project | Done | Note 1: Alpha status means that the crate is in a usable state but may have @@ -198,13 +203,14 @@ documentation generation, dashboard creation, and more. Below is a diagram detailing the primary components of the OpenTelemetry Weaver tool. -![OpenTelemetry Weaver Platform](docs/images/otel-weaver-platform.png) +![OpenTelemetry Weaver Platform](docs/images/otel-weaver-platform.svg) ## Links Internal links: - [Template Engine](docs/template-engine.md) +- [Policy Engine](crates/weaver_checker/README.md) - [Component Telemetry Schema](docs/component-telemetry-schema.md) (proposal) - [Resolved Telemetry Schema](docs/resolved-telemetry-schema.md) (proposal) - [Internal crates interdependencies](docs/dependencies.md) diff --git a/crates/weaver_checker/Cargo.toml b/crates/weaver_checker/Cargo.toml new file mode 100644 index 00000000..e47a2e9d --- /dev/null +++ b/crates/weaver_checker/Cargo.toml @@ -0,0 +1,45 @@ +[package] +name = "weaver_checker" +version = "0.1.0" +authors.workspace = true +repository.workspace = true +license.workspace = true +publish.workspace = true +edition.workspace = true +rust-version.workspace = true + + +[lints] +workspace = true + + +[dependencies] + +thiserror.workspace = true +serde.workspace = true +serde_json.workspace = true +serde_yaml.workspace = true + +regorus = { version = "0.1.3", default-features = false, features = [ + "arc", + "base64", + "base64url", + "coverage", + "crypto", + "deprecated", + "glob", + "graph", + "hex", + "http", + "jwt", + "jsonschema", + "regex", + "semver", + "time", + "uuid", + "urlquery", + "yaml" +]} + +[dev-dependencies] +# Required for testing \ No newline at end of file diff --git a/crates/weaver_checker/README.md b/crates/weaver_checker/README.md new file mode 100644 index 00000000..5b091811 --- /dev/null +++ b/crates/weaver_checker/README.md @@ -0,0 +1,296 @@ +# Weaver Policy Engine + +- [Overview](#overview) +- [Objectives](#objectives) +- [Policy Engine Features](#policy-engine-features) +- [Implementation](#implementation) + - [Policy Definition and Verification](#policy-definition-and-verification) + - [Usage](#usage) + - [Policy Examples](#policy-examples) +- [Creating Rules for Violation Detection](#creating-rules-for-violation-detection) + - [Understanding the `deny` Rule](#understanding-the-deny-rule) + - [Key Concepts for Rule Development](#key-concepts-for-rule-development) + - [Step-by-Step Guide to Creating a New Rule](#step-by-step-guide-to-creating-a-new-rule) +- [Links](#links) + +## Overview +The Weaver Policy Engine has been developed to enhance the management, +evolution, and maintainability of semantic conventions and application +telemetry schemas. It leverages a set of rules or policies to ensure the +coherence and quality of these conventions and schemas over time. This +documentation outlines the implemented features of the Weaver Policy Engine, +highlighting its goals, implementation details, and how it operates. + +## Objectives +The primary objective of the Weaver Policy Engine is to automate the +verification of policies related to semantic conventions and telemetry schemas +before the publication of a new version. These policies aim to maintain the +long-term integrity, coherence, and quality of these conventions and schemas. + +Example of policies: +- Prohibiting the use of attributes marked as deprecated unless the 'stability' + field is set to 'deprecated'. +- Disallowing attributes with high cardinality. +- Prohibiting optional attributes as required by some environments. +- Preventing name changes. +- Preventing the removal of attributes from metrics. +- Requiring the inclusion of 'owners' and 'contacts' fields for metrics, spans, + and traces. + +## Policy Engine Features +- **Decoupled Policy Management**: Policies are defined in separate Rego files, + allowing for easy updates, extensions, and customization. +- **Automated Verification**: Integration into CI/CD pipelines automates the + policy verification process, enhancing consistency and reliability. +- **Support for OpenTelemetry and Custom Policies**: The engine supports both + generic OpenTelemetry policies and company-specific policies, offering + flexibility in policy enforcement. +- **Enhanced Auditability**: By automating policy checks and maintaining + policies in versioned files, the Weaver Policy Engine improves audit trails and + compliance tracking. + +## Implementation + +### Policy Definition and Verification +The Weaver Policy Engine utilizes the '[Rego](https://www.openpolicyagent.org/docs/latest/policy-language/)' +language, popularized by the [Open Policy Agent](https://www.openpolicyagent.org/) (OPA) project, +for expressing and enforcing policies in a declarative manner. Policies are +written in Rego and can be stored in the semantic conventions repository or a +company-specific repository for custom policies. These policies are then applied +by the Weaver tool during various phases of the development process. + +The policy verification process involves: +- Reading the semconv files of both the new and previous versions. +- Applying Rego policies to these files to identify violations. +- Displaying any detected policy violations, aiding in the resolution before + publication. + + +### Usage +To verify policies, the command `weaver registry check` can be invoked with one +or more Rego files as parameters. This allows for the specific context-based +verification of policies against semantic conventions and telemetry schemas. + +### Policy Examples + +Example of a policy expressed in `Rego`: +```rego +package otel + +# Conventions for OTel: +# - `data` holds the current released semconv, which is known to be valid. +# - `input` holds the new candidate semconv version, whose validity is unknown. +# +# Note: `data` and `input` are predefined variables in Rego. + +# ========= Violation rules applied on unresolved semconv files ========= + +# A registry `attribute_group` containing at least one `ref` attribute is +# considered invalid. +deny[attr_registry_violation("registry_with_ref_attr", group.id, attr.ref)] { + group := input.groups[_] + startswith(group.id, "registry.") + attr := group.attributes[_] + attr.ref != null +} + +# An attribute whose stability is not `deprecated` but has the deprecated field +# set to true is invalid. +deny[attr_violation("attr_stability_deprecated", group.id, attr.id)] { + group := input.groups[_] + attr := group.attributes[_] + attr.stability != "deprecaded" + attr.deprecated +} + +# An attribute cannot be removed from a group that has already been released. +deny[schema_evolution_violation("attr_removed", old_group.id, old_attr.id)] { + old_group := data.groups[_] + old_attr := old_group.attributes[_] + not attr_exists_in_new_group(old_group.id, old_attr.id) +} + + +# ========= Helper functions ========= + +# Check if an attribute from the old group exists in the new +# group's attributes +attr_exists_in_new_group(group_id, attr_id) { + new_group := input.groups[_] + new_group.id == group_id + attr := new_group.attributes[_] + attr.id == attr_id +} + +# Build an attribute registry violation +attr_registry_violation(violation_id, group_id, attr_id) = violation { + violation := { + "id": violation_id, + "type": "semconv_attribute", + "category": "attrigute_registry", + "group": group_id, + "attr": attr_id, + } +} + +# Build an attribute violation +attr_violation(violation_id, group_id, attr_id) = violation { + violation := { + "id": violation_id, + "type": "semconv_attribute", + "category": "attrigute", + "group": group_id, + "attr": attr_id, + } +} + +# Build a schema evolution violation +schema_evolution_violation(violation_id, group_id, attr_id) = violation { + violation := { + "id": violation_id, + "type": "semconv_attribute", + "category": "schema_evolution", + "group": group_id, + "attr": attr_id, + } +} +``` + +These policies applied to the following semconv file... + +The already released version (data): +```yaml +groups: + - id: registry.network1 + prefix: network + type: attribute_group + brief: > + These attributes may be used for any network related operation. + attributes: + - id: protocol.name + stability: stable + type: string + brief: '[OSI application layer](https://osi-model.com/application-layer/) or non-OSI equivalent.' + note: The value SHOULD be normalized to lowercase. + examples: ['amqp', 'http', 'mqtt'] + deprecated: true + - id: protocol.name.3 + stability: stable + type: string + brief: '[OSI application layer](https://osi-model.com/application-layer/) or non-OSI equivalent.' + note: The value SHOULD be normalized to lowercase. + examples: ['amqp', 'http', 'mqtt'] +``` + +The unreleased version (input): +```yaml +groups: + - id: registry.network + prefix: network + type: attribute_group + brief: > + These attributes may be used for any network related operation. + attributes: + - id: protocol.name.1 + stability: stable + type: string + brief: '[OSI application layer](https://osi-model.com/application-layer/) or non-OSI equivalent.' + note: The value SHOULD be normalized to lowercase. + examples: ['amqp', 'http', 'mqtt'] + deprecated: true + - id: protocol.name.2 + stability: stable + type: string + brief: '[OSI application layer](https://osi-model.com/application-layer/) or non-OSI equivalent.' + note: The value SHOULD be normalized to lowercase. + examples: ['amqp', 'http', 'mqtt'] + - ref: protocol.port + deprecated: true +``` + +... will generate the following violations. + +```json +[ + { + "type": "semconv_attribute", + "id": "attr_stability_deprecated", + "category": "attrigute", + "group": "registry.network1", + "attr": "protocol.name" + }, + { + "type": "semconv_attribute", + "id": "attr_removed", + "category": "schema_evolution", + "group": "registry.network1", + "attr": "protocol.name.3" + }, + { + "type": "semconv_attribute", + "id": "registry_with_ref_attr", + "category": "attrigute_registry", + "group": "registry.network1", + "attr": "protocol.port" + } +] +``` + +## Creating Rules for Violation Detection + +The Weaver Policy Engine allows for the dynamic creation and enforcement of +rules to maintain the integrity and consistency of semantic conventions and +telemetry schemas. By leveraging the Rego language, developers can specify +policies that define what constitutes a violation within these domains. This +section explains how to craft rules for detecting new violations, enhancing the +engine's capability to safeguard the quality and coherence of semantic +conventions and application telemetry schemas. + +### Understanding the `deny` Rule + +The `deny` rule serves as the cornerstone for defining policy violations. When +the conditions specified within a `deny` rule are met, it indicates a policy +violation. Each `deny` rule must uniquely identify the violation it detects by +producing a descriptive message or a structured object that outlines the nature +of the violation. + +### Key Concepts for Rule Development + +- **Rule Name**: Rules detecting violations must be named `deny`. This is a +convention chosen by the Weaver project to facilitate the use and management of +these policy files. +- **Violation Conditions**: The body of a `deny` rule contains one or more +conditions that, when true, signal a violation. These conditions can range from +simple checks, like the presence of a deprecated attribute, to complex validations +involving multiple components of the semantic conventions or telemetry schemas. +- **Violation Message**: Upon detecting a violation, the rule should generate a +message or an object that provides detailed information about the violation, +including the type of violation, relevant identifiers (e.g., attribute or group +ID), and a brief description of the issue. + +### Step-by-Step Guide to Creating a New Rule + +1. **Identify the Violation**: Determine the specific condition or practice that +should be flagged as a violation. This could be a new policy requirement, a best +practice, or an identified gap in the current policy enforcement. + +2. **Define the Rule Conditions**: Craft a set of conditions that accurately +capture the criteria for the violation. Utilize the Rego language to express +these conditions, making use of variables, functions, and operators as necessary. + +3. **Construct the Violation Output**: For defining the structure of the violation +object, you can either reuse one of the already defined categories (i.e., +attr_registry_violation, attr_violation, schema_evolution_violation), or define +a new one if the category of this violation has not already been defined. + +4. **Implement the Rule in Rego**: Write the `deny` rule in Rego, encapsulating +the conditions and violation output you've defined. Ensure the rule is clearly +commented and documented to facilitate understanding and maintenance. + +5. **Test the Rule**: Before integrating the new rule into the Weaver Policy +Engine, test it with various input scenarios to ensure it accurately detects +violations without producing false positives or negatives. A unit test +framework will be provided to facilitate this process in a future PR. + +## Links +- [Rego Language Reference](https://www.openpolicyagent.org/docs/latest/policy-language/). \ No newline at end of file diff --git a/crates/weaver_checker/allowed-external-types.toml b/crates/weaver_checker/allowed-external-types.toml new file mode 100644 index 00000000..0f78677b --- /dev/null +++ b/crates/weaver_checker/allowed-external-types.toml @@ -0,0 +1,8 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 +# This is used with cargo-check-external-types to reduce the surface area of downstream crates from +# the public API. Ideally this can have a few exceptions as possible. +allowed_external_types = [ + "serde::ser::Serialize", + "serde::de::Deserialize" +] \ No newline at end of file diff --git a/crates/weaver_checker/data/policies/invalid_policies.rego b/crates/weaver_checker/data/policies/invalid_policies.rego new file mode 100644 index 00000000..16393c53 --- /dev/null +++ b/crates/weaver_checker/data/policies/invalid_policies.rego @@ -0,0 +1,34 @@ +package otel + +# Conventions for OTel: +# - `data` holds the current released semconv, which is known to be valid. +# - `input` holds the new candidate semconv version, whose validity is unknown. +# +# Note: `data` and `input` are predefined variables in Rego. + +# ========= Violation rules applied on unresolved semconv files ========= + +# A registry `attribute_group` containing at least one `ref` attribute is +# considered invalid. +deny[attr_registry_violation("registry_with_ref_attr", group.id, attr.ref)] { + group := input.groups[_] + startswith(group.id, "registry.") + attr := group.attributes[_] + attr.ref != null +} + +# An attribute whose stability is not `deprecated` but has the deprecated field +# set to true is invalid. +deny[attr_violation("attr_stability_deprecated", group.id, attr.id)] { + group := input.groups[_] + attr := group.attributes[_] + attr.stability != "deprecaded" + attr.deprecated +} + +# An attribute cannot be removed from a group that has already been released. +deny[schema_evolution_violation("attr_removed", old_group.id, old_attr.id)] { + old_group := data.groups[_] + old_attr := old_group.attributes[_] + not attr_exists_in_new_group(old_group.id, old_attr.id) +} \ No newline at end of file diff --git a/crates/weaver_checker/data/policies/invalid_violation_object.rego b/crates/weaver_checker/data/policies/invalid_violation_object.rego new file mode 100644 index 00000000..9613446e --- /dev/null +++ b/crates/weaver_checker/data/policies/invalid_violation_object.rego @@ -0,0 +1,73 @@ +package otel + +# Conventions for OTel: +# - `data` holds the current released semconv, which is known to be valid. +# - `input` holds the new candidate semconv version, whose validity is unknown. +# +# Note: `data` and `input` are predefined variables in Rego. + +# ========= Violation rules applied on unresolved semconv files ========= + +# A registry `attribute_group` containing at least one `ref` attribute is +# considered invalid. +deny[attr_registry_violation("registry_with_ref_attr", group.id, attr.ref)] { + group := input.groups[_] + startswith(group.id, "registry.") + attr := group.attributes[_] + attr.ref != null +} + +# An attribute whose stability is not `deprecated` but has the deprecated field +# set to true is invalid. +deny[attr_violation("attr_stability_deprecated", group.id, attr.id)] { + group := input.groups[_] + attr := group.attributes[_] + attr.stability != "deprecaded" + attr.deprecated +} + +# An attribute cannot be removed from a group that has already been released. +deny[schema_evolution_violation("attr_removed", old_group.id, old_attr.id)] { + old_group := data.groups[_] + old_attr := old_group.attributes[_] + not attr_exists_in_new_group(old_group.id, old_attr.id) +} + + +# ========= Helper functions ========= + +# Check if an attribute from the old group exists in the new +# group's attributes +attr_exists_in_new_group(group_id, attr_id) { + new_group := input.groups[_] + new_group.id == group_id + attr := new_group.attributes[_] + attr.id == attr_id +} + +# Build an attribute registry violation +attr_registry_violation(violation_id, group_id, attr_id) = violation { + violation := { + "id": violation_id, + "group": group_id, + "attr": attr_id, + } +} + +# Build an attribute violation +attr_violation(violation_id, group_id, attr_id) = violation { + violation := { + "id": violation_id, + "group": group_id, + "attr": attr_id, + } +} + +# Build a schema evolution violation +schema_evolution_violation(violation_id, group_id, attr_id) = violation { + violation := { + "id": violation_id, + "group": group_id, + "attr": attr_id, + } +} \ No newline at end of file diff --git a/crates/weaver_checker/data/policies/otel_policies.rego b/crates/weaver_checker/data/policies/otel_policies.rego new file mode 100644 index 00000000..66184d3c --- /dev/null +++ b/crates/weaver_checker/data/policies/otel_policies.rego @@ -0,0 +1,79 @@ +package otel + +# Conventions for OTel: +# - `data` holds the current released semconv, which is known to be valid. +# - `input` holds the new candidate semconv version, whose validity is unknown. +# +# Note: `data` and `input` are predefined variables in Rego. + +# ========= Violation rules applied on unresolved semconv files ========= + +# A registry `attribute_group` containing at least one `ref` attribute is +# considered invalid. +deny[attr_registry_violation("registry_with_ref_attr", group.id, attr.ref)] { + group := input.groups[_] + startswith(group.id, "registry.") + attr := group.attributes[_] + attr.ref != null +} + +# An attribute whose stability is not `deprecated` but has the deprecated field +# set to true is invalid. +deny[attr_violation("attr_stability_deprecated", group.id, attr.id)] { + group := input.groups[_] + attr := group.attributes[_] + attr.stability != "deprecaded" + attr.deprecated +} + +# An attribute cannot be removed from a group that has already been released. +deny[schema_evolution_violation("attr_removed", old_group.id, old_attr.id)] { + old_group := data.groups[_] + old_attr := old_group.attributes[_] + not attr_exists_in_new_group(old_group.id, old_attr.id) +} + + +# ========= Helper functions ========= + +# Check if an attribute from the old group exists in the new +# group's attributes +attr_exists_in_new_group(group_id, attr_id) { + new_group := input.groups[_] + new_group.id == group_id + attr := new_group.attributes[_] + attr.id == attr_id +} + +# Build an attribute registry violation +attr_registry_violation(violation_id, group_id, attr_id) = violation { + violation := { + "id": violation_id, + "type": "semconv_attribute", + "category": "attrigute_registry", + "group": group_id, + "attr": attr_id, + } +} + +# Build an attribute violation +attr_violation(violation_id, group_id, attr_id) = violation { + violation := { + "id": violation_id, + "type": "semconv_attribute", + "category": "attrigute", + "group": group_id, + "attr": attr_id, + } +} + +# Build a schema evolution violation +schema_evolution_violation(violation_id, group_id, attr_id) = violation { + violation := { + "id": violation_id, + "type": "semconv_attribute", + "category": "schema_evolution", + "group": group_id, + "attr": attr_id, + } +} \ No newline at end of file diff --git a/crates/weaver_checker/data/registries/registry.network.new.yaml b/crates/weaver_checker/data/registries/registry.network.new.yaml new file mode 100644 index 00000000..5b068366 --- /dev/null +++ b/crates/weaver_checker/data/registries/registry.network.new.yaml @@ -0,0 +1,22 @@ +groups: + - id: registry.network1 + prefix: network + type: attribute_group + brief: > + These attributes may be used for any network related operation. + attributes: + - id: protocol.name + stability: stable + type: string + brief: '[OSI application layer](https://osi-model.com/application-layer/) or non-OSI equivalent.' + note: The value SHOULD be normalized to lowercase. + examples: ['amqp', 'http', 'mqtt'] + deprecated: true + - id: protocol.name.2 + stability: stable + type: string + brief: '[OSI application layer](https://osi-model.com/application-layer/) or non-OSI equivalent.' + note: The value SHOULD be normalized to lowercase. + examples: ['amqp', 'http', 'mqtt'] + - ref: protocol.port + deprecated: true \ No newline at end of file diff --git a/crates/weaver_checker/data/registries/registry.network.old.yaml b/crates/weaver_checker/data/registries/registry.network.old.yaml new file mode 100644 index 00000000..0fc51694 --- /dev/null +++ b/crates/weaver_checker/data/registries/registry.network.old.yaml @@ -0,0 +1,20 @@ +groups: + - id: registry.network1 + prefix: network + type: attribute_group + brief: > + These attributes may be used for any network related operation. + attributes: + - id: protocol.name + stability: stable + type: string + brief: '[OSI application layer](https://osi-model.com/application-layer/) or non-OSI equivalent.' + note: The value SHOULD be normalized to lowercase. + examples: ['amqp', 'http', 'mqtt'] + deprecated: true + - id: protocol.name.3 + stability: stable + type: string + brief: '[OSI application layer](https://osi-model.com/application-layer/) or non-OSI equivalent.' + note: The value SHOULD be normalized to lowercase. + examples: ['amqp', 'http', 'mqtt'] \ No newline at end of file diff --git a/crates/weaver_checker/src/lib.rs b/crates/weaver_checker/src/lib.rs new file mode 100644 index 00000000..f550f413 --- /dev/null +++ b/crates/weaver_checker/src/lib.rs @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! This crate integrates a general purpose policy engine with the Weaver +//! project. The project `regorus` is the policy engine used in this crate to +//! evaluate policies. + +use crate::violation::Violation; +use serde::Serialize; +use serde_json::to_value; +use std::path::Path; + +pub mod violation; + +/// An error that can occur while evaluating policies. +#[derive(thiserror::Error, Debug)] +#[must_use] +#[non_exhaustive] +pub enum Error { + /// An invalid policy. + #[error("Invalid policy file '{file}', error: {error})")] + InvalidPolicyFile { + /// The file that caused the error. + file: String, + /// The error that occurred. + error: String, + }, + + /// An invalid data. + #[error("Invalid data, error: {error})")] + InvalidData { + /// The error that occurred. + error: String, + }, + + /// An invalid input. + #[error("Invalid input, error: {error})")] + InvalidInput { + /// The error that occurred. + error: String, + }, + + /// Violation evaluation error. + #[error("Violation evaluation error: {error}")] + ViolationEvaluationError { + /// The error that occurred. + error: String, + }, + + /// A container for multiple errors. + #[error("{:?}", Error::format_errors(.0))] + CompoundError(Vec), +} + +impl Error { + /// Formats the given errors into a single string. + /// This used to render compound errors. + #[must_use] + pub fn format_errors(errors: &[Error]) -> String { + errors + .iter() + .map(|e| e.to_string()) + .collect::>() + .join("\n\n") + } +} + +/// The policy engine. +#[derive(Clone, Default)] +pub struct Engine { + // The `regorus` policy engine. + engine: regorus::Engine, +} + +impl Engine { + /// Creates a new policy engine. + #[must_use] + pub fn new() -> Self { + Default::default() + } + + /// Adds a policy file to the policy engine. + /// A policy file is a `rego` file that contains the policies to be evaluated. + /// + /// # Arguments + /// + /// * `policy_path` - The path to the policy file. + pub fn add_policy>(&mut self, policy_path: P) -> Result<(), Error> { + let policy_path_str = policy_path.as_ref().to_string_lossy().to_string(); + + self.engine + .add_policy_from_file(policy_path) + .map_err(|e| Error::InvalidPolicyFile { + file: policy_path_str.clone(), + error: e.to_string(), + }) + } + + /// Adds a data document to the policy engine. + /// + /// Data versus Input: In essence, data is about what the policy engine + /// knows globally and statically (or what is updated dynamically but + /// considered part of policy engine's world knowledge), while input is + /// about what each request or query brings to the policy engine at + /// runtime, needing a decision based on current, external circumstances. + /// Combining data and input allows the policy engine to make informed, + /// context-aware decisions based on both its internal knowledge base and + /// the specifics of each request or action being evaluated. + pub fn add_data(&mut self, data: &T) -> Result<(), Error> { + let json_data = to_value(data).map_err(|e| Error::InvalidData { + error: e.to_string(), + })?; + let value: regorus::Value = + serde_json::from_value(json_data).map_err(|e| Error::InvalidInput { + error: e.to_string(), + })?; + self.engine.add_data(value).map_err(|e| Error::InvalidData { + error: e.to_string(), + }) + } + + /// Sets an input document for the policy engine. + /// + /// Data versus Input: In essence, data is about what the policy engine + /// knows globally and statically (or what is updated dynamically but + /// considered part of policy engine's world knowledge), while input is + /// about what each request or query brings to the policy engine at + /// runtime, needing a decision based on current, external circumstances. + /// Combining data and input allows the policy engine to make informed, + /// context-aware decisions based on both its internal knowledge base and + /// the specifics of each request or action being evaluated. + pub fn set_input(&mut self, input: &T) -> Result<(), Error> { + let json_input = to_value(input).map_err(|e| Error::InvalidInput { + error: e.to_string(), + })?; + + let value: regorus::Value = + serde_json::from_value(json_input).map_err(|e| Error::InvalidInput { + error: e.to_string(), + })?; + self.engine.set_input(value); + Ok(()) + } + + /// Returns a list of violations based on the policies, the data, and the + /// input. + pub fn check(&mut self) -> Result, Error> { + let value = self + .engine + .eval_rule("data.otel.deny".to_owned()) + .map_err(|e| Error::ViolationEvaluationError { + error: e.to_string(), + })?; + + // convert `regorus` value to `serde_json` value + let json_value = to_value(&value).map_err(|e| Error::ViolationEvaluationError { + error: e.to_string(), + })?; + + // convert json value into a vector of violations + let violations: Vec = + serde_json::from_value(json_value).map_err(|e| Error::ViolationEvaluationError { + error: e.to_string(), + })?; + + Ok(violations) + } +} + +#[cfg(test)] +mod tests { + use crate::violation::Violation; + use crate::{Engine, Error}; + use serde_yaml::Value; + use std::collections::HashMap; + + #[test] + fn test_policy() -> Result<(), Box> { + let mut engine = Engine::new(); + engine.add_policy("data/policies/otel_policies.rego")?; + + let old_semconv = std::fs::read_to_string("data/registries/registry.network.old.yaml")?; + let old_semconv: Value = serde_yaml::from_str(&old_semconv)?; + engine.add_data(&old_semconv)?; + + let new_semconv = std::fs::read_to_string("data/registries/registry.network.new.yaml")?; + let new_semconv: Value = serde_yaml::from_str(&new_semconv)?; + engine.set_input(&new_semconv)?; + + let expected_violations: HashMap = vec![ + Violation::SemconvAttribute { + id: "attr_stability_deprecated".to_owned(), + category: "attrigute".to_owned(), + group: "registry.network1".to_owned(), + attr: "protocol.name".to_owned(), + }, + Violation::SemconvAttribute { + id: "attr_removed".to_owned(), + category: "schema_evolution".to_owned(), + group: "registry.network1".to_owned(), + attr: "protocol.name.3".to_owned(), + }, + Violation::SemconvAttribute { + id: "registry_with_ref_attr".to_owned(), + category: "attrigute_registry".to_owned(), + group: "registry.network1".to_owned(), + attr: "protocol.port".to_owned(), + }, + ] + .into_iter() + .map(|v| (v.id().to_owned(), v)) + .collect(); + + let violations = engine.check()?; + assert_eq!(violations.len(), 3); + + for violation in violations { + assert_eq!(expected_violations.get(violation.id()), Some(&violation)); + println!("{}", violation); + } + + Ok(()) + } + + #[test] + fn test_invalid_policy() { + let mut engine = Engine::new(); + let result = engine.add_policy("data/policies/invalid_policy.rego"); + assert!(result.is_err()); + } + + #[test] + fn test_invalid_data() { + let mut engine = Engine::new(); + let result = engine.add_data(&"invalid data"); + assert!(result.is_err()); + } + + #[test] + fn test_invalid_violation_object() { + let mut engine = Engine::new(); + engine + .add_policy("data/policies/invalid_violation_object.rego") + .unwrap(); + + let new_semconv = + std::fs::read_to_string("data/registries/registry.network.new.yaml").unwrap(); + let new_semconv: Value = serde_yaml::from_str(&new_semconv).unwrap(); + engine.set_input(&new_semconv).unwrap(); + + let result = engine.check(); + assert!(result.is_err()); + + let observed_errors = Error::format_errors(&[result.unwrap_err()]); + assert_eq!( + observed_errors, + "Violation evaluation error: missing field `type`" + ); + } +} diff --git a/crates/weaver_checker/src/violation.rs b/crates/weaver_checker/src/violation.rs new file mode 100644 index 00000000..eca21d06 --- /dev/null +++ b/crates/weaver_checker/src/violation.rs @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Definition of a policy violation. + +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; + +/// Enum representing the different types of violations. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +#[serde(deny_unknown_fields)] +pub enum Violation { + /// A violation related to semantic convention attributes. + SemconvAttribute { + /// The ID of the policy violation. + id: String, + /// The category of the policy violation. + category: String, + /// The semconv group where the violation occurred. + group: String, + /// The semconv attribute where the violation occurred. + attr: String, + }, +} + +impl Display for Violation { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Violation::SemconvAttribute { + id, + category, + group, + attr, + } => { + write!( + f, + "id={}, category={}, group={}, attr={}", + id, category, group, attr + ) + } + } + } +} + +impl Violation { + /// Returns the violation id. + #[must_use] + pub fn id(&self) -> &str { + match self { + Violation::SemconvAttribute { id, .. } => id, + } + } +} diff --git a/crates/weaver_forge/Cargo.toml b/crates/weaver_forge/Cargo.toml index fa10fd63..4b5559c3 100644 --- a/crates/weaver_forge/Cargo.toml +++ b/crates/weaver_forge/Cargo.toml @@ -18,7 +18,7 @@ weaver_resolver = { path = "../weaver_resolver" } weaver_resolved_schema = { path = "../weaver_resolved_schema" } weaver_semconv = { path = "../weaver_semconv" } -minijinja = { version = "1.0.20", features = ["loader", "custom_syntax", "debug"] } +minijinja = { version = "1.0.20", features = ["loader", "custom_syntax", "debug", "json"] } convert_case = "0.6.0" globset = { version = "0.4.14", features = ["serde1"] } jaq-core = "1.2.1" diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index 7e9332eb..04b200b6 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -464,9 +464,8 @@ mod tests { let registry_id = "default"; let mut registry = SemConvRegistry::try_from_path(registry_id, "data/*.yaml") .expect("Failed to load registry"); - let schema = - SchemaResolver::resolve_semantic_convention_registry(&mut registry, logger.clone()) - .expect("Failed to resolve registry"); + let schema = SchemaResolver::resolve_semantic_convention_registry(&mut registry) + .expect("Failed to resolve registry"); let template_registry = TemplateRegistry::try_from_resolved_registry( schema.registry(registry_id).expect("registry not found"), diff --git a/crates/weaver_resolved_schema/Cargo.toml b/crates/weaver_resolved_schema/Cargo.toml index 6ff65bc6..6ab55312 100644 --- a/crates/weaver_resolved_schema/Cargo.toml +++ b/crates/weaver_resolved_schema/Cargo.toml @@ -20,4 +20,4 @@ serde.workspace = true ordered-float.workspace = true [dev-dependencies] -serde_json = "1.0.114" \ No newline at end of file +serde_json.workspace = true diff --git a/crates/weaver_resolver/Cargo.toml b/crates/weaver_resolver/Cargo.toml index 61c4d242..48b8a3c9 100644 --- a/crates/weaver_resolver/Cargo.toml +++ b/crates/weaver_resolver/Cargo.toml @@ -18,15 +18,16 @@ weaver_schema = { path = "../weaver_schema" } weaver_version = { path = "../weaver_version" } weaver_cache = { path = "../weaver_cache" } weaver_resolved_schema = { path = "../weaver_resolved_schema" } +weaver_checker = { path = "../weaver_checker" } regex.workspace = true thiserror.workspace = true rayon.workspace = true +serde.workspace = true serde_json.workspace = true walkdir.workspace = true url = "2.5.0" -serde = { version = "1.0.197", features = ["derive"] } [dev-dependencies] glob = "0.3.1" \ No newline at end of file diff --git a/crates/weaver_resolver/allowed-external-types.toml b/crates/weaver_resolver/allowed-external-types.toml index 7d571217..8dc26035 100644 --- a/crates/weaver_resolver/allowed-external-types.toml +++ b/crates/weaver_resolver/allowed-external-types.toml @@ -11,4 +11,5 @@ allowed_external_types = [ "weaver_cache::Cache", "weaver_schema::*", "weaver_version::*", + "weaver_checker::*" ] \ No newline at end of file diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index 4ade21b4..ca1b1280 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -4,24 +4,23 @@ use std::collections::HashMap; use std::path::{Path, PathBuf}; -use std::sync::atomic::AtomicUsize; -use std::sync::atomic::Ordering::Relaxed; use std::time::Instant; -use rayon::iter::IntoParallelRefIterator; +use rayon::iter::ParallelBridge; use rayon::iter::ParallelIterator; use regex::Regex; use url::Url; use walkdir::DirEntry; use weaver_cache::Cache; +use weaver_checker::violation::Violation; use weaver_logger::Logger; use weaver_resolved_schema::catalog::Catalog; use weaver_resolved_schema::registry::Constraint; use weaver_resolved_schema::ResolvedTelemetrySchema; use weaver_schema::TelemetrySchema; use weaver_semconv::path::RegistryPath; -use weaver_semconv::{ResolverConfig, SemConvRegistry, SemConvSpec, SemConvSpecWithProvenance}; +use weaver_semconv::{ResolverConfig, SemConvRegistry, SemConvSpec}; use weaver_version::VersionChanges; use crate::attribute::AttributeCatalog; @@ -158,6 +157,15 @@ pub enum Error { path: PathBuf, }, + /// A policy violation error. + #[error("Policy violation: {violation}, provenance: {provenance}")] + PolicyViolation { + /// The provenance of the violation (URL or path). + provenance: String, + /// The violation. + violation: Violation, + }, + /// A container for multiple errors. #[error("{:?}", Error::format_errors(.0))] CompoundError(Vec), @@ -207,6 +215,19 @@ impl Error { .collect::>() .join("\n\n") } + + /// Logs one or multiple errors (if current error is a 1CompoundError`) + /// using the given logger. + pub fn log(&self, logger: impl Logger + Clone + Sync) { + match self { + Error::CompoundError(errors) => { + for error in errors { + error.log(logger.clone()); + } + } + _ => logger.error(&self.to_string()), + } + } } impl SchemaResolver { @@ -301,28 +322,19 @@ impl SchemaResolver { cache: &Cache, log: impl Logger + Clone + Sync, ) -> Result { + let registry_path = RegistryPath::GitUrl { + git_url: registry_git_url, + path, + }; + let semconv_specs = Self::load_semconv_specs(®istry_path, cache)?; Self::semconv_registry_from_imports( registry_id, - &[RegistryPath::GitUrl { - git_url: registry_git_url, - path, - }], + semconv_specs, ResolverConfig::default(), - cache, log.clone(), ) } - /// Loads a semantic convention registry from the given Git URL. - pub fn load_semconv_registry( - registry_id: &str, - registry_path: RegistryPath, - cache: &Cache, - log: impl Logger + Clone + Sync, - ) -> Result { - Self::load_semconv_registry_from_imports(registry_id, &[registry_path], cache, log.clone()) - } - /// Loads a telemetry schema from the given URL or path. pub fn load_schema( schema_url_or_path: &str, @@ -382,47 +394,34 @@ impl SchemaResolver { cache: &Cache, log: impl Logger + Clone + Sync, ) -> Result { + let mut errors = vec![]; + let mut semconv_specs = vec![]; + + for registry_path in schema.merged_semantic_conventions() { + match Self::load_semconv_specs(®istry_path, cache) { + Ok(specs) => semconv_specs.extend(specs), + Err(e) => errors.push(e), + } + } + handle_errors(errors)?; + Self::semconv_registry_from_imports( registry_id, - &schema.merged_semantic_conventions(), + semconv_specs, ResolverConfig::default(), - cache, log.clone(), ) } - /// Loads a semantic convention registry from the given semantic convention imports. - pub fn load_semconv_registry_from_imports( - registry_id: &str, - imports: &[RegistryPath], - cache: &Cache, - log: impl Logger + Clone + Sync, - ) -> Result { - let start = Instant::now(); - let registry = - Self::create_semantic_convention_registry(registry_id, imports, cache, log.clone())?; - log.success(&format!( - "Loaded {} semantic convention files containing the definition of {} attributes and {} metrics ({:.2}s)", - registry.asset_count(), - registry.attribute_count(), - registry.metric_count(), - start.elapsed().as_secs_f32() - )); - - Ok(registry) - } - /// Loads a semantic convention registry from the given semantic convention imports. pub fn semconv_registry_from_imports( registry_id: &str, - imports: &[RegistryPath], + semconv_specs: Vec<(String, SemConvSpec)>, resolver_config: ResolverConfig, - cache: &Cache, log: impl Logger + Clone + Sync, ) -> Result { let start = Instant::now(); - let mut registry = - Self::create_semantic_convention_registry(registry_id, imports, cache, log.clone())?; + let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); let warnings = registry .resolve(resolver_config) .map_err(|e| Error::SemConvError { @@ -447,10 +446,7 @@ impl SchemaResolver { /// corresponding resolved telemetry schema. pub fn resolve_semantic_convention_registry( registry: &mut SemConvRegistry, - log: impl Logger + Clone + Sync, ) -> Result { - let start = Instant::now(); - let mut attr_catalog = AttributeCatalog::default(); let resolved_registry = resolve_semconv_registry(&mut attr_catalog, "", registry)?; @@ -472,14 +468,6 @@ impl SchemaResolver { versions: None, // ToDo LQ: Implement this! }; - log.success(&format!( - "Resolved {} semantic convention files containing the definition of {} attributes and {} metrics ({:.2}s)", - registry.asset_count(), - registry.attribute_count(), - registry.metric_count(), - start.elapsed().as_secs_f32() - )); - Ok(resolved_schema) } @@ -535,97 +523,45 @@ impl SchemaResolver { Ok(parent_schema) } - /// Creates a semantic convention registry from the given telemetry schema. - fn create_semantic_convention_registry( - registry_id: &str, - sem_convs: &[RegistryPath], - cache: &Cache, - log: impl Logger + Sync, - ) -> Result { - // Load all the semantic convention catalogs. - let mut sem_conv_catalog = SemConvRegistry::new(registry_id); - let total_file_count = sem_convs.len(); - let loaded_files_count = AtomicUsize::new(0); - let error_count = AtomicUsize::new(0); - - let result: Vec> = sem_convs - .par_iter() - .flat_map(|sem_conv_import| { - let results = Self::import_sem_conv_specs(sem_conv_import, cache); - for result in results.iter() { - if result.is_err() { - _ = error_count.fetch_add(1, Relaxed); - } - _ = loaded_files_count.fetch_add(1, Relaxed); - if error_count.load(Relaxed) == 0 { - log.loading(&format!( - "Loaded {}/{} semantic convention files (no error detected)", - loaded_files_count.load(Relaxed), - total_file_count - )); - } else { - log.loading(&format!( - "Loaded {}/{} semantic convention files ({} error(s) detected)", - loaded_files_count.load(Relaxed), - total_file_count, - error_count.load(Relaxed) - )); - } - } - results - }) - .collect(); - - let mut errors = vec![]; - result.into_iter().for_each(|result| match result { - Ok((provenance, spec)) => { - sem_conv_catalog - .append_sem_conv_spec(SemConvSpecWithProvenance { provenance, spec }); - } - Err(e) => { - log.error(&e.to_string()); - errors.push(e); - } - }); - - // ToDo LQ: Propagate the errors! - - Ok(sem_conv_catalog) - } - - /// Imports the semantic convention specifications from the given registry path. - /// This function returns a vector of results because the import declaration can be a - /// URL or a git URL (containing potentially multiple semantic convention specifications). - fn import_sem_conv_specs( + /// Loads the semantic convention specifications from the given registry path. + /// Implementation note: semconv files are read and parsed in parallel and + /// all errors are collected and returned as a compound error. + /// + /// # Arguments + /// * `registry_path` - The registry path containing the semantic convention files. + /// * `cache` - The cache to store the semantic convention files. + pub fn load_semconv_specs( registry_path: &RegistryPath, cache: &Cache, - ) -> Vec> { + ) -> Result, Error> { match registry_path { RegistryPath::Local { local_path: path } => { - Self::import_semconv_from_local_path(path.into(), path) + Self::load_semconv_from_local_path(path.into(), path) } RegistryPath::GitUrl { git_url, path } => { match cache.git_repo(git_url.clone(), path.clone()) { Ok(local_git_repo) => { - Self::import_semconv_from_local_path(local_git_repo, git_url) + Self::load_semconv_from_local_path(local_git_repo, git_url) } - Err(e) => vec![Err(Error::SemConvError { + Err(e) => Err(Error::SemConvError { message: e.to_string(), - })], + }), } } } } - /// Imports the semantic convention specifications from the given local path. + /// Loads the semantic convention specifications from the given local path. + /// Implementation note: semconv files are read and parsed in parallel and + /// all errors are collected and returned as a compound error. /// /// # Arguments /// * `local_path` - The local path containing the semantic convention files. /// * `registry_path_repr` - The representation of the registry path (URL or path). - fn import_semconv_from_local_path( + fn load_semconv_from_local_path( local_path: PathBuf, registry_path_repr: &str, - ) -> Vec> { + ) -> Result, Error> { fn is_hidden(entry: &DirEntry) -> bool { entry .file_name() @@ -642,22 +578,25 @@ impl SchemaResolver { && file_name != "schema-next.yaml" } - let mut result = vec![]; - // Loads the semantic convention specifications from the git repo. - // All yaml files are recursively loaded from the given path. - for entry in walkdir::WalkDir::new(local_path.clone()) + // All yaml files are recursively loaded and parsed in parallel from + // the given path. + let result = walkdir::WalkDir::new(local_path.clone()) .into_iter() .filter_entry(|e| !is_hidden(e)) - { - match entry { - Ok(entry) => { - if is_semantic_convention_file(&entry) { + .par_bridge() + .filter_map(|entry| { + match entry { + Ok(entry) => { + if !is_semantic_convention_file(&entry) { + return None; + } + let spec = SemConvRegistry::load_sem_conv_spec_from_file(entry.path()) .map_err(|e| Error::SemConvError { message: e.to_string(), }); - result.push(match spec { + match spec { Ok((path, spec)) => { // Replace the local path with the git URL combined with the relative path // of the semantic convention file. @@ -667,19 +606,33 @@ impl SchemaResolver { .unwrap_or_default(); let path = format!("{}/{}", registry_path_repr, &path[prefix.len() + 1..]); - Ok((path, spec)) + Some(Ok((path, spec))) } - Err(e) => Err(e), - }); + Err(e) => Some(Err(e)), + } } + Err(e) => Some(Err(Error::SemConvError { + message: e.to_string(), + })), } - Err(e) => result.push(Err(Error::SemConvError { - message: e.to_string(), - })), - } - } + }) + .collect::>(); + + let mut error = vec![]; + let result = result + .into_iter() + .filter_map(|r| match r { + Ok(r) => Some(r), + Err(e) => { + error.push(e); + None + } + }) + .collect::>(); + + handle_errors(error)?; - result + Ok(result) } } diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index 42965932..9e5c43d5 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -706,7 +706,6 @@ mod tests { use glob::glob; use serde::Serialize; - use weaver_logger::TestLogger; use weaver_resolved_schema::attribute; use weaver_resolved_schema::registry::{Constraint, Registry}; use weaver_semconv::group::GroupType; @@ -744,7 +743,7 @@ mod tests { .to_str() .expect("Failed to convert test directory to string"); - // if !test_dir.ends_with("registry-test-lineage-2") { + // if !test_dir.ends_with("registry-test-7-spans") { // // Skip the test for now as it is not yet supported. // continue; // } @@ -857,7 +856,6 @@ mod tests { #[test] fn test_api_usage() -> Result<(), Box> { - let logger = TestLogger::new(); let registry_id = "local"; let registry_dir = "data/registry-test-7-spans/registry/*.yaml"; @@ -867,10 +865,8 @@ mod tests { let mut semconv_registry = SemConvRegistry::try_from_path(registry_id, registry_dir)?; // Resolve the semantic convention registry. - let resolved_schema = SchemaResolver::resolve_semantic_convention_registry( - &mut semconv_registry, - logger.clone(), - )?; + let resolved_schema = + SchemaResolver::resolve_semantic_convention_registry(&mut semconv_registry)?; // Get the resolved registry by its ID. let resolved_registry = resolved_schema.registry(registry_id).unwrap(); diff --git a/crates/weaver_semconv/src/lib.rs b/crates/weaver_semconv/src/lib.rs index ee3804f2..0f3bfcb0 100644 --- a/crates/weaver_semconv/src/lib.rs +++ b/crates/weaver_semconv/src/lib.rs @@ -6,12 +6,12 @@ //! The YAML language syntax used to define a semantic convention file //! can be found [here](https://github.com/open-telemetry/build-tools/blob/main/semantic-conventions/syntax.md). -use glob::glob; use std::collections::{HashMap, HashSet}; use std::fs::File; use std::io::BufReader; use std::path::Path; +use glob::glob; use serde::{Deserialize, Serialize}; use validator::Validate; @@ -351,6 +351,27 @@ impl SemConvRegistry { Ok(registry) } + /// Creates a semantic convention registry from the given list of + /// semantic convention specs. + /// + /// # Arguments + /// + /// * `registry_id` - The id of the semantic convention registry. + /// * `semconv_specs` - The list of semantic convention specs to load. + pub fn from_semconv_specs( + registry_id: &str, + semconv_specs: Vec<(String, SemConvSpec)>, + ) -> SemConvRegistry { + // Load all the semantic convention catalogs. + let mut registry = SemConvRegistry::new(registry_id); + + for (provenance, spec) in semconv_specs { + registry.append_sem_conv_spec(SemConvSpecWithProvenance { provenance, spec }); + } + + registry + } + /// Returns the id of the semantic convention registry. #[must_use] pub fn id(&self) -> &str { diff --git a/crates/weaver_semconv_gen/Cargo.toml b/crates/weaver_semconv_gen/Cargo.toml index e0b01106..ce26429d 100644 --- a/crates/weaver_semconv_gen/Cargo.toml +++ b/crates/weaver_semconv_gen/Cargo.toml @@ -11,7 +11,6 @@ rust-version.workspace = true [dependencies] weaver_cache = { path = "../weaver_cache" } weaver_diff = { path = "../weaver_diff" } -weaver_logger = { path = "../weaver_logger" } weaver_resolver = { path = "../weaver_resolver" } weaver_resolved_schema = { path = "../weaver_resolved_schema" } weaver_semconv = { path = "../weaver_semconv" } diff --git a/crates/weaver_semconv_gen/allowed-external-types.toml b/crates/weaver_semconv_gen/allowed-external-types.toml index dab741c8..ce8a2b6d 100644 --- a/crates/weaver_semconv_gen/allowed-external-types.toml +++ b/crates/weaver_semconv_gen/allowed-external-types.toml @@ -6,6 +6,5 @@ allowed_external_types = [ "weaver_semconv::Error", "weaver_semconv::path::RegistryPath", "weaver_resolver::Error", - "weaver_logger::Logger", "weaver_cache::Cache", ] diff --git a/crates/weaver_semconv_gen/src/lib.rs b/crates/weaver_semconv_gen/src/lib.rs index 1f0525aa..9473e864 100644 --- a/crates/weaver_semconv_gen/src/lib.rs +++ b/crates/weaver_semconv_gen/src/lib.rs @@ -5,9 +5,9 @@ //! poorly porting the code into RUST. We expect to optimise and improve things over time. use std::fs; + use weaver_cache::Cache; use weaver_diff::diff_output; -use weaver_logger::Logger; use weaver_resolved_schema::attribute::{Attribute, AttributeRef}; use weaver_resolved_schema::registry::{Group, Registry}; use weaver_resolved_schema::ResolvedTelemetrySchema; @@ -15,11 +15,11 @@ use weaver_resolver::SchemaResolver; use weaver_semconv::path::RegistryPath; use weaver_semconv::SemConvRegistry; +use crate::gen::{AttributeTableView, GenerateMarkdownContext, MetricView}; + mod gen; mod parser; -use crate::gen::{AttributeTableView, GenerateMarkdownContext, MetricView}; - /// Errors emitted by this crate. #[derive(thiserror::Error, Debug)] #[non_exhaustive] @@ -91,6 +91,7 @@ pub enum MarkdownGenParameters { /// Omit the requirement level. OmitRequirementLevel, } + /// Markdown-snippet generation arguments. pub struct GenerateMarkdownArgs { /// The id of the metric, event, span or attribute group to render. @@ -98,6 +99,7 @@ pub struct GenerateMarkdownArgs { /// Arguments the user specified that we've parsed. args: Vec, } + impl GenerateMarkdownArgs { // Returns true if the `full` flag was specified. fn is_full(&self) -> bool { @@ -207,15 +209,13 @@ pub struct ResolvedSemconvRegistry { schema: ResolvedTelemetrySchema, registry_id: String, } + impl ResolvedSemconvRegistry { /// Resolve the semantic convention registry and make it available for rendering markdown snippets. - pub fn try_from_path( - path_pattern: &str, - log: impl Logger + Clone + Sync, - ) -> Result { + pub fn try_from_path(path_pattern: &str) -> Result { let registry_id = "semantic_conventions"; let mut registry = SemConvRegistry::try_from_path(registry_id, path_pattern)?; - let schema = SchemaResolver::resolve_semantic_convention_registry(&mut registry, log)?; + let schema = SchemaResolver::resolve_semantic_convention_registry(&mut registry)?; let lookup = ResolvedSemconvRegistry { schema, registry_id: registry_id.into(), @@ -227,12 +227,11 @@ impl ResolvedSemconvRegistry { pub fn try_from_url( registry_path: RegistryPath, cache: &Cache, - log: impl Logger + Clone + Sync, ) -> Result { let registry_id = "semantic_conventions"; - let mut registry = - SchemaResolver::load_semconv_registry(registry_id, registry_path, cache, log.clone())?; - let schema = SchemaResolver::resolve_semantic_convention_registry(&mut registry, log)?; + let semconv_specs = SchemaResolver::load_semconv_specs(®istry_path, cache)?; + let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); + let schema = SchemaResolver::resolve_semantic_convention_registry(&mut registry)?; let lookup = ResolvedSemconvRegistry { schema, registry_id: registry_id.into(), @@ -257,10 +256,10 @@ impl ResolvedSemconvRegistry { #[cfg(test)] mod tests { - use crate::{update_markdown, Error, ResolvedSemconvRegistry}; use std::fs; use std::path::PathBuf; - use weaver_logger::TestLogger; + + use crate::{update_markdown, Error, ResolvedSemconvRegistry}; fn force_print_error(result: Result) -> T { match result { @@ -271,8 +270,7 @@ mod tests { #[test] fn test_http_semconv() -> Result<(), Error> { - let logger = TestLogger::default(); - let lookup = ResolvedSemconvRegistry::try_from_path("data/**/*.yaml", logger.clone())?; + let lookup = ResolvedSemconvRegistry::try_from_path("data/**/*.yaml")?; let attribute_registry_url = "../attributes-registry"; // Check our test files. force_print_error(update_markdown( @@ -306,9 +304,8 @@ mod tests { } fn run_legacy_test(path: PathBuf) -> Result<(), Error> { - let logger = TestLogger::default(); let semconv_path = format!("{}/*.yaml", path.display()); - let lookup = ResolvedSemconvRegistry::try_from_path(&semconv_path, logger.clone())?; + let lookup = ResolvedSemconvRegistry::try_from_path(&semconv_path)?; let test_path = path.join("test.md").display().to_string(); // Attempts to update the test - will fail if there is any difference in the generated markdown. update_markdown(&test_path, &lookup, true, None) diff --git a/deny.toml b/deny.toml index 1e04147a..5d1a8965 100644 --- a/deny.toml +++ b/deny.toml @@ -138,6 +138,8 @@ allow-registry = ["https://github.com/rust-lang/crates.io-index"] # List of URLs for allowed Git repositories allow-git = [ "https://github.com/rust-lang/cargo", + # Will be removed once this issue is resolved: https://github.com/microsoft/regorus/issues/199 + "https://github.com/lquerel/regorus.git" ] [sources.allow-org] diff --git a/docs/images/dependencies.svg b/docs/images/dependencies.svg index 48f6c1b2..13e201fe 100644 --- a/docs/images/dependencies.svg +++ b/docs/images/dependencies.svg @@ -4,273 +4,303 @@ - + - + 0 - -weaver_cache + +weaver_cache 1 - -weaver_forge + +weaver_checker 2 - -weaver_logger - - - -1->2 - - + +weaver_diff 3 - -weaver_resolved_schema + +weaver_forge - - -1->3 - - + + +3->2 + + 4 - -weaver_semconv - - - -1->4 - - - - - -6 - -weaver_resolver - - - -1->6 - - + +weaver_logger - + 3->4 - - + + 5 - -weaver_version + +weaver_resolved_schema - + 3->5 - - + + - - -6->0 - - + + +6 + +weaver_semconv - - -6->2 - - + + +3->6 + + - - -6->3 - - + + +8 + +weaver_resolver - - -6->4 - - + + +3->8 + + - - -6->5 - - + + +5->6 + + 7 - -weaver_schema - - - -6->7 - - - - - -7->4 - - - - - -7->5 - - + +weaver_version - - -8 - -weaver_semconv_gen + + +5->7 + + - + 8->0 - - + + - - -8->2 - - - - - -8->3 - - + + +8->1 + + - + 8->4 - - + + + + + +8->5 + + - + 8->6 - - + + + + + +8->7 + + 9 - -weaver_template + +weaver_schema - - -9->0 - - - - - -9->2 - - + + +8->9 + + - + 9->6 - - + + - + 9->7 - - + + 10 - -xtask + +weaver_semconv_gen + + + +10->0 + + + + + +10->2 + + + + + +10->5 + + + + + +10->6 + + + + + +10->8 + + 11 - -weaver + +weaver_template - + 11->0 - - + + - - -11->1 - - + + +11->4 + + - + + +11->8 + + + + + +11->9 + + + + + +12 + +xtask + + + +13 + +weaver + + -11->2 - - +13->0 + + - + -11->3 - - - - - -11->4 - - +13->1 + + - + -11->6 - - +13->3 + + - + -11->7 - - +13->4 + + - + + +13->5 + + + + + +13->6 + + + + -11->8 - - +13->8 + + - + -11->9 - - +13->9 + + + + + +13->10 + + + + + +13->11 + + diff --git a/docs/images/otel-weaver-platform.svg b/docs/images/otel-weaver-platform.svg new file mode 100644 index 00000000..7507b641 --- /dev/null +++ b/docs/images/otel-weaver-platform.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/schemas/otel_policies.rego b/schemas/otel_policies.rego new file mode 100644 index 00000000..d18c5194 --- /dev/null +++ b/schemas/otel_policies.rego @@ -0,0 +1,91 @@ +package otel + +# Conventions for OTel: +# - `data` holds the current released semconv, which is known to be valid. +# - `input` holds the new candidate semconv version, whose validity is unknown. +# +# Note: `data` and `input` are predefined variables in Rego. + +# ========= Violation rules applied on unresolved semconv files ========= + +# A registry `attribute_group` containing at least one `ref` attribute is +# considered invalid. +deny[attr_registry_violation("registry_with_ref_attr", group.id, attr.ref)] { + group := input.groups[_] + startswith(group.id, "registry.") + attr := group.attributes[_] + attr.ref != null +} + +# A registry `attribute_group` marked as `deprecated` must only contain +# attributes marked as `deprecated`. +deny[attr_registry_violation("deprecated_registry_with_regular_attr", group.id, attr.ref)] { + group := input.groups[_] + startswith(group.id, "registry.") + endswith(group.id, ".deprecated") + attr := group.attributes[_] + not attr.deprecated +} + +# An attribute whose stability is not `deprecated` and that is not part of a +# deprecated attribute_group registry but has the deprecated field +# set to true is invalid. +deny[attr_violation("attr_stability_deprecated", group.id, attr.id)] { + group := input.groups[_] + not endswith(group.id, ".deprecated") + attr := group.attributes[_] + attr.stability != "deprecated" + attr.deprecated +} + +# An attribute cannot be removed from a group that has already been released. +deny[schema_evolution_violation("attr_removed", old_group.id, old_attr.id)] { + old_group := data.groups[_] + old_attr := old_group.attributes[_] + not attr_exists_in_new_group(old_group.id, old_attr.id) +} + + +# ========= Helper functions ========= + +# Check if an attribute from the old group exists in the new +# group's attributes +attr_exists_in_new_group(group_id, attr_id) { + new_group := input.groups[_] + new_group.id == group_id + attr := new_group.attributes[_] + attr.id == attr_id +} + +# Build an attribute registry violation +attr_registry_violation(violation_id, group_id, attr_id) = violation { + violation := { + "id": violation_id, + "type": "semconv_attribute", + "category": "attrigute_registry", + "group": group_id, + "attr": attr_id, + } +} + +# Build an attribute violation +attr_violation(violation_id, group_id, attr_id) = violation { + violation := { + "id": violation_id, + "type": "semconv_attribute", + "category": "attrigute", + "group": group_id, + "attr": attr_id, + } +} + +# Build a schema evolution violation +schema_evolution_violation(violation_id, group_id, attr_id) = violation { + violation := { + "id": violation_id, + "type": "semconv_attribute", + "category": "schema_evolution", + "group": group_id, + "attr": attr_id, + } +} \ No newline at end of file diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 00000000..ee6941f0 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Error management + +use std::fmt::Display; +use std::process::exit; + +// Define a trait with the exit_if_error method +pub trait ExitIfError { + /// Call the error code and exit the process if the result is an error. + /// Otherwise, return the value. + /// + /// # Arguments + /// * `self` - The result to check + /// * `err_handler` - The error handler to call if the result is an error + /// + /// # Returns + /// The value if the result is Ok + fn exit_if_error(self, err_handler: F) -> T; + + /// Call the error code and exit the process with the given code if the + /// result is an error. + /// + /// # Arguments + /// * `self` - The result to check + /// * `code` - The exit code to use if the result is an error + /// * `err_handler` - The error handler to call if the result is an error + /// + /// # Returns + /// The value if the result is Ok + #[allow(dead_code)] + fn exit_with_code_if_error(self, code: i32, err_handler: F) -> T; +} + +// Implement the trait for all Result where E is an error. +impl ExitIfError for Result { + /// Call the error code and exit the process if the result is an error. + /// Otherwise, return the value. + /// + /// # Arguments + /// * `self` - The result to check + /// * `err_handler` - The error handler to call if the result is an error + /// + /// # Returns + /// The value if the result is Ok + fn exit_if_error(self, err_handler: F) -> T { + match self { + Ok(value) => value, + Err(e) => { + err_handler(e); + #[allow(clippy::exit)] // Expected behavior + exit(1) + } + } + } + + /// Call the error code and exit the process with the given code if the + /// result is an error. + /// + /// # Arguments + /// * `self` - The result to check + /// * `code` - The exit code to use if the result is an error + /// * `err_handler` - The error handler to call if the result is an error + /// + /// # Returns + /// The value if the result is Ok + fn exit_with_code_if_error(self, code: i32, err_handler: F) -> T { + match self { + Ok(value) => value, + Err(e) => { + err_handler(e); + #[allow(clippy::exit)] // Expected behavior + exit(code) + } + } + } +} diff --git a/src/main.rs b/src/main.rs index 96a189ee..0aa727e0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,6 +15,7 @@ use crate::gen_client::command_gen_client; use crate::resolve::command_resolve; mod cli; +pub(crate) mod error; #[cfg(feature = "experimental")] mod gen_client; #[cfg(feature = "experimental")] diff --git a/src/registry/check.rs b/src/registry/check.rs index 3fb18b62..e10c2efc 100644 --- a/src/registry/check.rs +++ b/src/registry/check.rs @@ -2,13 +2,12 @@ //! Check a semantic convention registry. -use crate::registry::{semconv_registry_path_from, RegistryPath}; +use crate::registry::{check_policies, load_semconv_specs, resolve_semconv_specs, RegistryPath}; use clap::Args; +use std::path::PathBuf; use weaver_cache::Cache; use weaver_logger::Logger; -use weaver_resolver::attribute::AttributeCatalog; -use weaver_resolver::registry::resolve_semconv_registry; -use weaver_resolver::SchemaResolver; +use weaver_semconv::SemConvRegistry; /// Parameters for the `registry check` sub-command #[derive(Debug, Args)] @@ -25,37 +24,35 @@ pub struct RegistryCheckArgs { /// registry is located #[arg(short = 'd', long, default_value = "model")] pub registry_git_sub_dir: Option, + + /// Optional list of policy files to check against the files of the semantic + /// convention registry before the resolution process. + #[arg(short = 'b', long)] + pub before_resolution_policies: Vec, } /// Check a semantic convention registry. #[cfg(not(tarpaulin_include))] -pub(crate) fn command(log: impl Logger + Sync + Clone, cache: &Cache, args: &RegistryCheckArgs) { - log.loading(&format!("Checking registry `{}`", args.registry)); +pub(crate) fn command(logger: impl Logger + Sync + Clone, cache: &Cache, args: &RegistryCheckArgs) { + logger.loading(&format!("Checking registry `{}`", args.registry)); let registry_id = "default"; // Load the semantic convention registry into a local cache. // No parsing errors should be observed. - let semconv_specs = SchemaResolver::load_semconv_registry( - registry_id, - semconv_registry_path_from(&args.registry, &args.registry_git_sub_dir), + let semconv_specs = load_semconv_specs( + &args.registry, + &args.registry_git_sub_dir, cache, - log.clone(), - ) - .unwrap_or_else(|e| { - panic!("Failed to load and parse the semantic convention registry, error: {e}"); - }); - - // Resolve the semantic convention registry. - let mut attr_catalog = AttributeCatalog::default(); - let registry_path = args.registry.to_string(); - let _ = resolve_semconv_registry(&mut attr_catalog, ®istry_path, &semconv_specs) - .unwrap_or_else(|e| { - panic!("Failed to resolve the semantic convention registry.\n{e}"); - }); - - log.success(&format!( - "Registry `{}` checked successfully", - args.registry - )); + logger.clone(), + ); + + check_policies( + &args.before_resolution_policies, + &semconv_specs, + logger.clone(), + ); + + let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); + _ = resolve_semconv_specs(&mut registry, logger); } diff --git a/src/registry/generate.rs b/src/registry/generate.rs index 933c789e..2b216637 100644 --- a/src/registry/generate.rs +++ b/src/registry/generate.rs @@ -2,16 +2,18 @@ //! Generate artifacts for a semantic convention registry. -use clap::Args; use std::path::PathBuf; -use crate::registry::{semconv_registry_path_from, RegistryPath}; +use clap::Args; + use weaver_cache::Cache; -use weaver_forge::debug::print_dedup_errors; use weaver_forge::registry::TemplateRegistry; use weaver_forge::{GeneratorConfig, TemplateEngine}; use weaver_logger::Logger; -use weaver_resolver::SchemaResolver; +use weaver_semconv::SemConvRegistry; + +use crate::error::ExitIfError; +use crate::registry::{check_policies, load_semconv_specs, resolve_semconv_specs, RegistryPath}; /// Parameters for the `registry generate` sub-command #[derive(Debug, Args)] @@ -41,6 +43,11 @@ pub struct RegistryGenerateArgs { /// registry is located #[arg(short = 'd', long, default_value = "model")] pub registry_git_sub_dir: Option, + + /// Optional list of policy files to check against the files of the semantic + /// convention registry before the resolution process. + #[arg(short = 'b', long)] + pub before_resolution_policies: Vec, } /// Generate artifacts from a semantic convention registry. @@ -58,26 +65,28 @@ pub(crate) fn command( let registry_id = "default"; // Load the semantic convention registry into a local cache. - let mut registry = SchemaResolver::load_semconv_registry( - registry_id, - semconv_registry_path_from(&args.registry, &args.registry_git_sub_dir), + let semconv_specs = load_semconv_specs( + &args.registry, + &args.registry_git_sub_dir, cache, logger.clone(), - ) - .unwrap_or_else(|e| { - panic!("Failed to load and parse the semantic convention registry, error: {e}"); - }); - - // Resolve the semantic convention registry. - let schema = - SchemaResolver::resolve_semantic_convention_registry(&mut registry, logger.clone()) - .expect("Failed to resolve registry"); + ); + check_policies( + &args.before_resolution_policies, + &semconv_specs, + logger.clone(), + ); + let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); + let schema = resolve_semconv_specs(&mut registry, logger.clone()); let engine = TemplateEngine::try_new( &format!("registry/{}", args.target), GeneratorConfig::default(), ) - .expect("Failed to create template engine"); + .exit_if_error(|e| { + logger.error("Failed to create the template engine"); + logger.error(&e.to_string()); + }); let template_registry = TemplateRegistry::try_from_resolved_registry( schema @@ -85,19 +94,16 @@ pub(crate) fn command( .expect("Failed to get the registry from the resolved schema"), schema.catalog(), ) - .unwrap_or_else(|e| { - panic!( - "Failed to create the context for the template evaluation: {:?}", - e - ) + .exit_if_error(|e| { + logger.error("Failed to create the registry without catalog"); + logger.error(&e.to_string()); }); - match engine.generate(logger.clone(), &template_registry, args.output.as_path()) { - Ok(_) => logger.success("Artifacts generated successfully"), - Err(e) => { - print_dedup_errors(logger.clone(), e); - #[allow(clippy::exit)] // Expected behavior - std::process::exit(1); - } - }; + engine + .generate(logger.clone(), &template_registry, args.output.as_path()) + .exit_if_error(|e| { + logger.error(&e.to_string()); + }); + + logger.success("Artifacts generated successfully"); } diff --git a/src/registry/mod.rs b/src/registry/mod.rs index be69d37c..43341938 100644 --- a/src/registry/mod.rs +++ b/src/registry/mod.rs @@ -3,12 +3,19 @@ //! Commands to manage a semantic convention registry. use clap::{Args, Subcommand}; +use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use std::fmt::Display; use std::str::FromStr; +use crate::error::ExitIfError; use check::RegistryCheckArgs; +use std::path::PathBuf; use weaver_cache::Cache; +use weaver_checker::Engine; use weaver_logger::Logger; +use weaver_resolved_schema::ResolvedTelemetrySchema; +use weaver_resolver::{handle_errors, Error, SchemaResolver}; +use weaver_semconv::{SemConvRegistry, SemConvSpec}; use crate::registry::generate::RegistryGenerateArgs; use crate::registry::resolve::RegistryResolveArgs; @@ -122,7 +129,8 @@ pub fn semconv_registry(log: impl Logger + Sync + Clone, command: &RegistryComma } /// Convert a `RegistryPath` to a `weaver_semconv::path::RegistryPath`. -pub fn semconv_registry_path_from( +#[cfg(not(tarpaulin_include))] +pub(crate) fn semconv_registry_path_from( registry: &RegistryPath, path: &Option, ) -> weaver_semconv::path::RegistryPath { @@ -136,3 +144,127 @@ pub fn semconv_registry_path_from( }, } } + +/// Load the semantic convention specifications from a registry path. +/// +/// # Arguments +/// +/// * `registry_path` - The path to the semantic convention registry. +/// * `cache` - The cache to use for loading the registry. +/// * `log` - The logger to use for logging messages. +#[cfg(not(tarpaulin_include))] +pub(crate) fn load_semconv_specs( + registry: &RegistryPath, + path: &Option, + cache: &Cache, + log: impl Logger + Sync + Clone, +) -> Vec<(String, SemConvSpec)> { + let registry_path = semconv_registry_path_from(registry, path); + let semconv_specs = + SchemaResolver::load_semconv_specs(®istry_path, cache).exit_if_error(|e| { + e.log(log.clone()); + }); + log.success(&format!( + "SemConv registry loaded ({} files)", + semconv_specs.len() + )); + semconv_specs +} + +/// Check the policies of a semantic convention registry. +/// +/// # Arguments +/// +/// * `policy_engine` - The pre-configured policy engine to use for checking the policies. +/// * `semconv_specs` - The semantic convention specifications to check. +#[cfg(not(tarpaulin_include))] +pub fn check_policy( + policy_engine: &Engine, + semconv_specs: &[(String, SemConvSpec)], +) -> Result<(), Error> { + // Check policies in parallel + let policy_errors = semconv_specs + .par_iter() + .flat_map(|(path, semconv)| { + // Create a local policy engine inheriting the policies + // from the global policy engine + let mut policy_engine = policy_engine.clone(); + let mut errors = vec![]; + + match policy_engine.set_input(semconv) { + Ok(_) => match policy_engine.check() { + Ok(violations) => { + for violation in violations { + errors.push(Error::PolicyViolation { + provenance: path.clone(), + violation, + }); + } + } + Err(e) => errors.push(Error::SemConvError { + message: format!("Invalid policy evaluation for file '{path}': {e}"), + }), + }, + Err(e) => errors.push(Error::SemConvError { + message: format!("Invalid policy engine input for file '{path}': {e}"), + }), + } + errors + }) + .collect::>(); + + handle_errors(policy_errors)?; + Ok(()) +} + +/// Check the policies of a semantic convention registry. +/// +/// # Arguments +/// +/// * `before_resolution_policies` - The list of policy files to check before the resolution process. +/// * `semconv_specs` - The semantic convention specifications to check. +/// * `logger` - The logger to use for logging messages. +#[cfg(not(tarpaulin_include))] +fn check_policies( + before_resolution_policies: &[PathBuf], + semconv_specs: &[(String, SemConvSpec)], + logger: impl Logger + Sync + Clone, +) { + if !before_resolution_policies.is_empty() { + let mut engine = Engine::new(); + for policy in before_resolution_policies { + engine.add_policy(policy).exit_if_error(|e| { + logger.error(&format!( + "Failed to load policy file `{}`, error: {e}", + policy.display() + )); + }); + } + check_policy(&engine, semconv_specs).exit_if_error(|e| { + e.log(logger.clone()); + }); + logger.success("Policies checked"); + } +} + +/// Resolve the semantic convention specifications and return the resolved schema. +/// +/// # Arguments +/// +/// * `registry_id` - The ID of the semantic convention registry. +/// * `semconv_specs` - The semantic convention specifications to resolve. +/// * `logger` - The logger to use for logging messages. +#[cfg(not(tarpaulin_include))] +pub(crate) fn resolve_semconv_specs( + registry: &mut SemConvRegistry, + logger: impl Logger + Sync + Clone, +) -> ResolvedTelemetrySchema { + let resolved_schema = SchemaResolver::resolve_semantic_convention_registry(registry) + .exit_if_error(|e| { + logger.error("Failed to resolve the semantic convention registry"); + e.log(logger.clone()); + }); + + logger.success("SemConv registry resolved"); + resolved_schema +} diff --git a/src/registry/resolve.rs b/src/registry/resolve.rs index 741c1c72..3f626503 100644 --- a/src/registry/resolve.rs +++ b/src/registry/resolve.rs @@ -10,9 +10,9 @@ use serde::Serialize; use weaver_cache::Cache; use weaver_forge::registry::TemplateRegistry; use weaver_logger::Logger; -use weaver_resolver::SchemaResolver; +use weaver_semconv::SemConvRegistry; -use crate::registry::{semconv_registry_path_from, RegistryArgs}; +use crate::registry::{load_semconv_specs, resolve_semconv_specs, RegistryArgs}; /// Supported output formats for the resolved schema #[derive(Debug, Clone, ValueEnum)] @@ -66,20 +66,14 @@ pub(crate) fn command( let registry_id = "default"; // Load the semantic convention registry into a local cache. - let mut registry = SchemaResolver::load_semconv_registry( - registry_id, - semconv_registry_path_from(&args.registry.registry, &args.registry.registry_git_sub_dir), + let semconv_specs = load_semconv_specs( + &args.registry.registry, + &args.registry.registry_git_sub_dir, cache, logger.clone(), - ) - .unwrap_or_else(|e| { - panic!("Failed to load and parse the semantic convention registry, error: {e}"); - }); - - // Resolve the semantic convention registry. - let schema = - SchemaResolver::resolve_semantic_convention_registry(&mut registry, logger.clone()) - .expect("Failed to resolve registry"); + ); + let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); + let schema = resolve_semconv_specs(&mut registry, logger.clone()); // Serialize the resolved schema and write it // to a file or print it to stdout. diff --git a/src/registry/stats.rs b/src/registry/stats.rs index d2401f6c..4ef85070 100644 --- a/src/registry/stats.rs +++ b/src/registry/stats.rs @@ -2,13 +2,12 @@ //! Compute stats on a semantic convention registry. -use crate::registry::{semconv_registry_path_from, RegistryArgs}; +use crate::registry::{load_semconv_specs, resolve_semconv_specs, RegistryArgs}; use clap::Args; use weaver_cache::Cache; use weaver_logger::Logger; use weaver_resolved_schema::registry::{CommonGroupStats, GroupStats}; use weaver_resolved_schema::ResolvedTelemetrySchema; -use weaver_resolver::SchemaResolver; use weaver_semconv::group::GroupType; use weaver_semconv::SemConvRegistry; @@ -22,8 +21,8 @@ pub struct RegistryStatsArgs { /// Compute stats on a semantic convention registry. #[cfg(not(tarpaulin_include))] -pub(crate) fn command(log: impl Logger + Sync + Clone, cache: &Cache, args: &RegistryStatsArgs) { - log.loading(&format!( +pub(crate) fn command(logger: impl Logger + Sync + Clone, cache: &Cache, args: &RegistryStatsArgs) { + logger.loading(&format!( "Compute statistics on the registry `{}`", args.registry.registry )); @@ -31,23 +30,20 @@ pub(crate) fn command(log: impl Logger + Sync + Clone, cache: &Cache, args: &Reg let registry_id = "default"; // Load the semantic convention registry into a local cache. - let mut registry = SchemaResolver::load_semconv_registry( - registry_id, - semconv_registry_path_from(&args.registry.registry, &args.registry.registry_git_sub_dir), + let semconv_specs = load_semconv_specs( + &args.registry.registry, + &args.registry.registry_git_sub_dir, cache, - log.clone(), - ) - .unwrap_or_else(|e| { - panic!("Failed to load and parse the semantic convention registry, error: {e}"); - }); + logger.clone(), + ); + let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); display_semconv_registry_stats(®istry); // Resolve the semantic convention registry. - let schema = SchemaResolver::resolve_semantic_convention_registry(&mut registry, log.clone()) - .expect("Failed to resolve registry"); + let resolved_schema = resolve_semconv_specs(&mut registry, logger); - display_schema_stats(&schema); + display_schema_stats(&resolved_schema); } #[cfg(not(tarpaulin_include))] @@ -125,11 +121,13 @@ fn display_schema_stats(schema: &ResolvedTelemetrySchema) { let catalog_stats = &stats.catalog_stats; println!("Shared Catalog (after resolution and deduplication):"); - println!( - " - Number of deduplicated attributes: {} ({}%)", - catalog_stats.attribute_count, - catalog_stats.attribute_count * 100 / total_number_of_attributes - ); + if total_number_of_attributes > 0 { + println!( + " - Number of deduplicated attributes: {} ({}%)", + catalog_stats.attribute_count, + catalog_stats.attribute_count * 100 / total_number_of_attributes + ); + } println!(" - Attribute types breakdown:"); for (attribute_type, count) in catalog_stats.attribute_type_breakdown.iter() { println!(" - {}: {}", attribute_type, count); diff --git a/src/registry/update_markdown.rs b/src/registry/update_markdown.rs index 36d08b5e..80ab6f6a 100644 --- a/src/registry/update_markdown.rs +++ b/src/registry/update_markdown.rs @@ -3,6 +3,7 @@ //! Update markdown files that contain markers indicating the templates used to //! update the specified sections. +use crate::error::ExitIfError; use crate::registry::{semconv_registry_path_from, RegistryPath}; use clap::Args; use weaver_cache::Cache; @@ -53,10 +54,10 @@ pub(crate) fn command( let registry = ResolvedSemconvRegistry::try_from_url( semconv_registry_path_from(&args.registry, &args.registry_git_sub_dir), cache, - log.clone(), ) - .unwrap_or_else(|e| { - panic!("Failed to resolve the semantic convention registry.\n{e}"); + .exit_if_error(|e| { + log.error("Failed to resolve the semantic convention registry"); + log.error(&e.to_string()); }); log.success("Registry resolved successfully"); let operation = if args.dry_run { diff --git a/src/resolve.rs b/src/resolve.rs index 998cb95a..4cd5ee9a 100644 --- a/src/resolve.rs +++ b/src/resolve.rs @@ -7,6 +7,7 @@ use std::path::PathBuf; use std::process::exit; use weaver_cache::Cache; +use crate::error::ExitIfError; use crate::registry::RegistryPath; use weaver_logger::Logger; use weaver_resolver::SchemaResolver; @@ -67,14 +68,18 @@ pub fn command_resolve(log: impl Logger + Sync + Clone, command: &ResolveCommand match command.command { ResolveSubCommand::Registry(ref command) => { let registry_id = "default"; + let registry_path = weaver_semconv::path::RegistryPath::GitUrl { + git_url: command.registry.clone(), + path: command.path.clone(), + }; + let semconv_specs = SchemaResolver::load_semconv_specs(®istry_path, &cache) + .exit_if_error(|e| { + e.log(log.clone()); + }); let mut registry = SchemaResolver::semconv_registry_from_imports( registry_id, - &[weaver_semconv::path::RegistryPath::GitUrl { - git_url: command.registry.clone(), - path: command.path.clone(), - }], + semconv_specs, ResolverConfig::with_keep_specs(), - &cache, log.clone(), ) .unwrap_or_else(|e| { @@ -82,12 +87,13 @@ pub fn command_resolve(log: impl Logger + Sync + Clone, command: &ResolveCommand exit(1); }); - let resolved_schema = - SchemaResolver::resolve_semantic_convention_registry(&mut registry, log.clone()) - .unwrap_or_else(|e| { - log.error(&e.to_string()); - exit(1); - }); + let resolved_schema = SchemaResolver::resolve_semantic_convention_registry( + &mut registry, + ) + .unwrap_or_else(|e| { + log.error(&e.to_string()); + exit(1); + }); match serde_yaml::to_string(&resolved_schema) { Ok(yaml) => { if let Some(output) = &command.output { diff --git a/src/search/mod.rs b/src/search/mod.rs index 00d8c035..8d89b447 100644 --- a/src/search/mod.rs +++ b/src/search/mod.rs @@ -27,6 +27,7 @@ use tantivy::schema::{Field, Schema, STORED, TEXT}; use tantivy::{Index, IndexWriter, ReloadPolicy}; use tui_textarea::TextArea; +use crate::error::ExitIfError; use crate::registry::{semconv_registry_path_from, RegistryPath}; use theme::ThemeConfig; use weaver_cache::Cache; @@ -36,6 +37,7 @@ use weaver_resolver::registry::resolve_semconv_registry; use weaver_resolver::SchemaResolver; use weaver_schema::attribute::Attribute; use weaver_schema::TelemetrySchema; +use weaver_semconv::SemConvRegistry; use crate::search::schema::{attribute, metric, metric_group, resource, span}; @@ -218,24 +220,22 @@ fn search_registry_command2( registry_args: &SearchRegistry2, ) { let registry_id = "default"; - let semconv_specs = SchemaResolver::load_semconv_registry( - registry_id, - semconv_registry_path_from(®istry_args.registry, ®istry_args.path), - cache, - log.clone(), - ) - .unwrap_or_else(|e| { - log.error(&format!("{}", e)); - std::process::exit(1); - }); + let registry_path = semconv_registry_path_from(®istry_args.registry, ®istry_args.path); + let semconv_specs = + SchemaResolver::load_semconv_specs(®istry_path, cache).exit_if_error(|e| { + e.log(log.clone()); + }); + let semconv_specs = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); let mut attr_catalog = AttributeCatalog::default(); + let registry_path = registry_args.registry.to_string(); let resolved_registry = - resolve_semconv_registry(&mut attr_catalog, ®istry_args.registry, &semconv_specs) - .unwrap_or_else(|e| { + resolve_semconv_registry(&mut attr_catalog, ®istry_path, &semconv_specs).unwrap_or_else( + |e| { log.error(&format!("{}", e)); std::process::exit(1); - }); + }, + ); dbg!(resolved_registry); //dbg!(attr_catalog); diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index e34397eb..9b7bcf84 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -8,6 +8,7 @@ use weaver_resolver::attribute::AttributeCatalog; use weaver_resolver::registry::resolve_semconv_registry; use weaver_resolver::SchemaResolver; use weaver_semconv::path::RegistryPath; +use weaver_semconv::SemConvRegistry; /// The URL of the official semantic convention registry. const SEMCONV_REGISTRY_URL: &str = "https://github.com/open-telemetry/semantic-conventions.git"; @@ -32,18 +33,17 @@ fn test_semconv_registry_resolution() { // Load the official semantic convention registry into a local cache. // No parsing errors should be observed. - let semconv_specs = SchemaResolver::load_semconv_registry( - registry_id, - RegistryPath::GitUrl { - git_url: SEMCONV_REGISTRY_URL.to_owned(), - path: Some(SEMCONV_REGISTRY_MODEL.to_owned()), - }, - &cache, - log.clone(), - ) - .unwrap_or_else(|e| { - panic!("Failed to load and parse the official semantic convention registry, error: {e}"); - }); + let registry_path = RegistryPath::GitUrl { + git_url: SEMCONV_REGISTRY_URL.to_owned(), + path: Some(SEMCONV_REGISTRY_MODEL.to_owned()), + }; + let semconv_specs = SchemaResolver::load_semconv_specs(®istry_path, &cache) + .inspect_err(|e| { + log.error("Failed to load the semantic convention registry"); + log.error(&e.to_string()); + }) + .unwrap(); + let semconv_specs = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); // Check if the logger has reported any warnings or errors. assert_eq!(log.warn_count(), 0);