diff --git a/.github/actions/spelling/patterns.txt b/.github/actions/spelling/patterns.txt index 099638859774d..db0c21c4feb10 100644 --- a/.github/actions/spelling/patterns.txt +++ b/.github/actions/spelling/patterns.txt @@ -226,7 +226,7 @@ user:P@ssw0rd /.+@base64/.+ # Ignore base64 encoded values in VRL examples (requires padding to avoid false positives) -"[A-Za-z0-9]*==" +"[A-Za-z0-9+\/]*==" # ignore uuid_from_friendly_id argument uuid_from_friendly_id!\(".*"\) diff --git a/Cargo.lock b/Cargo.lock index 19a9f2eed20d4..b460bdc6f7837 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3096,6 +3096,25 @@ dependencies = [ "snafu 0.7.5", ] +[[package]] +name = "dnstap-parser" +version = "0.1.0" +dependencies = [ + "anyhow", + "base64 0.22.1", + "bytes 1.9.0", + "chrono", + "chrono-tz", + "dnsmsg-parser", + "hickory-proto", + "prost 0.12.6", + "prost-build 0.12.6", + "snafu 0.7.5", + "tracing 0.1.41", + "vector-lib", + "vrl", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -4309,7 +4328,6 @@ dependencies = [ "rand 0.8.5", "thiserror 1.0.68", "tinyvec", - "tokio", "tracing 0.1.41", "url", ] @@ -10824,6 +10842,7 @@ dependencies = [ "derivative", "dirs-next", "dnsmsg-parser", + "dnstap-parser", "dyn-clone", "encoding_rs", "enum_dispatch", @@ -11300,6 +11319,7 @@ dependencies = [ "chrono", "chrono-tz", "clap", + "dnstap-parser", "enrichment", "glob", "prettydiff", @@ -11317,6 +11337,7 @@ name = "vector-vrl-web-playground" version = "0.1.0" dependencies = [ "cargo-lock", + "dnstap-parser", "enrichment", "getrandom 0.2.15", "gloo-utils", diff --git a/Cargo.toml b/Cargo.toml index 5a7a68bdf7270..000df386a2180 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -103,6 +103,7 @@ members = [ ".", "lib/codecs", "lib/dnsmsg-parser", + "lib/dnstap-parser", "lib/docs-renderer", "lib/enrichment", "lib/fakedata", @@ -133,6 +134,7 @@ members = [ ] [workspace.dependencies] +anyhow = "1.0.94" cfg-if = { version = "1.0.0", default-features = false } chrono = { version = "0.4.38", default-features = false, features = ["clock", "serde"] } chrono-tz = { version = "0.10.0", default-features = false, features = ["serde"] } @@ -140,6 +142,7 @@ clap = { version = "4.5.22", default-features = false, features = ["derive", "er flate2 = { version = "1.0.35", default-features = false, features = ["default"] } futures = { version = "0.3.31", default-features = false, features = ["compat", "io-compat", "std"], package = "futures" } glob = { version = "0.3.1", default-features = false } +hickory-proto = { version = "0.24.1", default-features = false, features = ["dnssec"] } indexmap = { version = "2.7.0", default-features = false, features = ["serde", "std"] } metrics = "0.24.1" metrics-tracing-context = { version = "0.17.0", default-features = false } @@ -177,6 +180,7 @@ snafu.workspace = true # Internal libs dnsmsg-parser = { path = "lib/dnsmsg-parser", optional = true } +dnstap-parser = { path = "lib/dnstap-parser", optional = true } fakedata = { path = "lib/fakedata", optional = true } portpicker = { path = "lib/portpicker" } tracing-limit = { path = "lib/tracing-limit" } @@ -368,7 +372,7 @@ tokio-postgres = { version = "0.7.12", default-features = false, features = ["ru tokio-tungstenite = { version = "0.20.1", default-features = false, features = ["connect"], optional = true } toml.workspace = true tonic = { workspace = true, optional = true } -hickory-proto = { version = "0.24.1", default-features = false, features = ["dnssec"], optional = true } +hickory-proto = { workspace = true, optional = true } typetag = { version = "0.2.18", default-features = false } url = { version = "2.5.4", default-features = false, features = ["serde"] } warp = { version = "0.3.7", default-features = false } @@ -584,7 +588,7 @@ sources-aws_s3 = ["aws-core", "dep:aws-sdk-sqs", "dep:aws-sdk-s3", "dep:semver", sources-aws_sqs = ["aws-core", "dep:aws-sdk-sqs"] sources-datadog_agent = ["sources-utils-http-error", "protobuf-build", "dep:prost"] sources-demo_logs = ["dep:fakedata"] -sources-dnstap = ["sources-utils-net-tcp", "dep:base64", "dep:hickory-proto", "dep:dnsmsg-parser", "protobuf-build", "dep:prost"] +sources-dnstap = ["sources-utils-net-tcp", "dep:base64", "dep:hickory-proto", "dep:dnsmsg-parser", "dep:dnstap-parser", "protobuf-build", "dep:prost"] sources-docker_logs = ["docker"] sources-eventstoredb_metrics = [] sources-exec = [] diff --git a/buf.yaml b/buf.yaml index 9b6755725807f..892768db97363 100644 --- a/buf.yaml +++ b/buf.yaml @@ -3,6 +3,7 @@ modules: - path: lib/vector-core/proto - path: proto/third-party - path: proto/vector + - path: lib/dnstap-parser/proto lint: use: - DEFAULT diff --git a/build.rs b/build.rs index 5419a99b1b4d5..b603074f90bcc 100644 --- a/build.rs +++ b/build.rs @@ -116,7 +116,6 @@ fn main() { #[cfg(feature = "protobuf-build")] { - println!("cargo:rerun-if-changed=proto/third-party/dnstap.proto"); println!("cargo:rerun-if-changed=proto/third-party/google/pubsub/v1/pubsub.proto"); println!("cargo:rerun-if-changed=proto/third-party/google/rpc/status.proto"); println!("cargo:rerun-if-changed=proto/vector/dd_metric.proto"); @@ -144,7 +143,6 @@ fn main() { prost_build, &[ "lib/vector-core/proto/event.proto", - "proto/third-party/dnstap.proto", "proto/vector/ddsketch_full.proto", "proto/vector/dd_metric.proto", "proto/vector/dd_trace.proto", diff --git a/changelog.d/21985_parse_dnstap.feature.md b/changelog.d/21985_parse_dnstap.feature.md new file mode 100644 index 0000000000000..f61caf1b2ec3f --- /dev/null +++ b/changelog.d/21985_parse_dnstap.feature.md @@ -0,0 +1,3 @@ +Add VRL function `parse_dnstap` that can parse dnstap data and produce output in the same format as `dnstap` source. + +authors: esensar diff --git a/lib/dnsmsg-parser/Cargo.toml b/lib/dnsmsg-parser/Cargo.toml index 56f6946d038c9..ceab56f0246b3 100644 --- a/lib/dnsmsg-parser/Cargo.toml +++ b/lib/dnsmsg-parser/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT" [dependencies] data-encoding = "2.6" -hickory-proto = { version = "0.24", features = ["dnssec"] } +hickory-proto.workspace = true snafu.workspace = true [dev-dependencies] diff --git a/lib/dnstap-parser/Cargo.toml b/lib/dnstap-parser/Cargo.toml new file mode 100644 index 0000000000000..db86200cb130e --- /dev/null +++ b/lib/dnstap-parser/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "dnstap-parser" +version = "0.1.0" +authors = ["Vector Contributors "] +edition = "2021" +publish = false +license = "MIT" + +[dependencies] +base64 = { version = "0.22.1", default-features = false } +bytes = { version = "1.9.0", default-features = false, features = ["serde"] } +chrono.workspace = true +dnsmsg-parser = { path = "../dnsmsg-parser" } +hickory-proto.workspace = true +prost.workspace = true +snafu.workspace = true +tracing = { version = "0.1.34", default-features = false } +vector-lib = { path = "../vector-lib" } +vrl.workspace = true + +[build-dependencies] +prost-build.workspace = true + +[dev-dependencies] +anyhow.workspace = true +chrono-tz.workspace = true diff --git a/lib/dnstap-parser/build.rs b/lib/dnstap-parser/build.rs new file mode 100644 index 0000000000000..01880ac2bde38 --- /dev/null +++ b/lib/dnstap-parser/build.rs @@ -0,0 +1,8 @@ +fn main() { + println!("cargo:rerun-if-changed=proto/dnstap.proto"); + let mut prost_build = prost_build::Config::new(); + prost_build.btree_map(["."]); + prost_build + .compile_protos(&["proto/dnstap.proto"], &["proto"]) + .expect("Failed to compile proto files"); +} diff --git a/proto/third-party/dnstap.proto b/lib/dnstap-parser/proto/dnstap.proto similarity index 100% rename from proto/third-party/dnstap.proto rename to lib/dnstap-parser/proto/dnstap.proto diff --git a/lib/dnstap-parser/src/internal_events.rs b/lib/dnstap-parser/src/internal_events.rs new file mode 100644 index 0000000000000..ab1635c9b3e03 --- /dev/null +++ b/lib/dnstap-parser/src/internal_events.rs @@ -0,0 +1,20 @@ +use tracing::warn; +use vector_lib::internal_event::InternalEvent; +use vector_lib::internal_event::{error_stage, error_type}; + +#[derive(Debug)] +pub(crate) struct DnstapParseWarning { + pub error: E, +} + +impl InternalEvent for DnstapParseWarning { + fn emit(self) { + warn!( + message = "Recoverable error occurred while parsing dnstap data.", + error = %self.error, + stage = error_stage::PROCESSING, + error_type = error_type::PARSER_FAILED, + internal_log_rate_limit = true, + ); + } +} diff --git a/lib/dnstap-parser/src/lib.rs b/lib/dnstap-parser/src/lib.rs new file mode 100644 index 0000000000000..d8c1744e08137 --- /dev/null +++ b/lib/dnstap-parser/src/lib.rs @@ -0,0 +1,12 @@ +#![deny(warnings)] + +use vrl::compiler::Function; + +mod internal_events; +pub mod parser; +pub mod schema; +mod vrl_functions; + +pub fn vrl_functions() -> Vec> { + vrl_functions::all() +} diff --git a/src/sources/dnstap/parser.rs b/lib/dnstap-parser/src/parser.rs similarity index 99% rename from src/sources/dnstap/parser.rs rename to lib/dnstap-parser/src/parser.rs index 117f41cde6edc..6a950c1a152b7 100644 --- a/src/sources/dnstap/parser.rs +++ b/lib/dnstap-parser/src/parser.rs @@ -5,6 +5,7 @@ use std::{ net::{IpAddr, Ipv4Addr, Ipv6Addr}, sync::LazyLock, }; +use vector_lib::emit; use base64::prelude::{Engine as _, BASE64_STANDARD}; use bytes::Bytes; @@ -18,9 +19,8 @@ use prost::Message; use snafu::Snafu; use vrl::{owned_value_path, path}; -use crate::{ +use vector_lib::{ event::{LogEvent, Value}, - internal_events::DnstapParseWarning, Error, Result, }; @@ -29,7 +29,7 @@ mod dnstap_proto { include!(concat!(env!("OUT_DIR"), "/dnstap.rs")); } -use crate::sources::dnstap::schema::DNSTAP_VALUE_PATHS; +use crate::{internal_events::DnstapParseWarning, schema::DNSTAP_VALUE_PATHS}; use dnstap_proto::{ message::Type as DnstapMessageType, Dnstap, Message as DnstapMessage, SocketFamily, SocketProtocol, @@ -38,7 +38,7 @@ use vector_lib::config::log_schema; use vector_lib::lookup::lookup_v2::ValuePath; use vector_lib::lookup::PathPrefix; -use super::{ +use dnsmsg_parser::{ dns_message::{ DnsRecord, EdnsOptionEntry, OptPseudoSection, QueryHeader, QueryQuestion, UpdateHeader, ZoneInfo, @@ -459,14 +459,15 @@ impl DnstapParser { ); } - Ok(if let Some(response_port) = dnstap_message.response_port { + if let Some(response_port) = dnstap_message.response_port { DnstapParser::insert( event, prefix.clone(), &DNSTAP_VALUE_PATHS.response_port, response_port, ); - }) + }; + Ok(()) } fn log_time<'a>( @@ -1015,7 +1016,6 @@ fn to_dnstap_message_type(type_id: i32) -> String { #[cfg(test)] mod tests { use super::*; - use crate::event::Value; use chrono::DateTime; use dnsmsg_parser::dns_message_parser::DnsParserOptions; use std::collections::BTreeMap; diff --git a/src/sources/dnstap/schema.rs b/lib/dnstap-parser/src/schema.rs similarity index 99% rename from src/sources/dnstap/schema.rs rename to lib/dnstap-parser/src/schema.rs index 01d1446c09ad4..3f90977fd500f 100644 --- a/src/sources/dnstap/schema.rs +++ b/lib/dnstap-parser/src/schema.rs @@ -12,7 +12,7 @@ pub struct DnstapEventSchema; impl DnstapEventSchema { /// The message schema for the request and response message fields - fn request_message_schema_definition(&self) -> Collection { + pub(crate) fn request_message_schema_definition(&self) -> Collection { let mut result: BTreeMap = BTreeMap::new(); result.insert( DNSTAP_VALUE_PATHS.time.to_string().into(), @@ -288,7 +288,7 @@ pub struct DnstapPaths { } /// Lazily initialized singleton. -pub(crate) static DNSTAP_VALUE_PATHS: LazyLock = LazyLock::new(|| DnstapPaths { +pub static DNSTAP_VALUE_PATHS: LazyLock = LazyLock::new(|| DnstapPaths { server_identity: owned_value_path!("serverId"), server_version: owned_value_path!("serverVersion"), extra: owned_value_path!("extraInfo"), diff --git a/lib/dnstap-parser/src/vrl_functions/mod.rs b/lib/dnstap-parser/src/vrl_functions/mod.rs new file mode 100644 index 0000000000000..ef6d13f403ebf --- /dev/null +++ b/lib/dnstap-parser/src/vrl_functions/mod.rs @@ -0,0 +1,7 @@ +use vrl::compiler::Function; + +pub mod parse_dnstap; + +pub fn all() -> Vec> { + vec![Box::new(parse_dnstap::ParseDnstap) as _] +} diff --git a/lib/dnstap-parser/src/vrl_functions/parse_dnstap.rs b/lib/dnstap-parser/src/vrl_functions/parse_dnstap.rs new file mode 100644 index 0000000000000..ee6c5d52bc8b8 --- /dev/null +++ b/lib/dnstap-parser/src/vrl_functions/parse_dnstap.rs @@ -0,0 +1,399 @@ +use crate::parser::DnstapParser; +use crate::schema::DnstapEventSchema; +use base64::prelude::{Engine as _, BASE64_STANDARD}; +use dnsmsg_parser::dns_message_parser::DnsParserOptions; +use vector_lib::event::LogEvent; +use vrl::prelude::*; + +#[derive(Clone, Copy, Debug)] +pub struct ParseDnstap; + +impl Function for ParseDnstap { + fn identifier(&self) -> &'static str { + "parse_dnstap" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::BYTES, + required: true, + }, + Parameter { + keyword: "lowercase_hostnames", + kind: kind::BOOLEAN, + required: false, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[Example { + title: "Parse dnstap query message", + source: r#"parse_dnstap!("ChVqYW1lcy1WaXJ0dWFsLU1hY2hpbmUSC0JJTkQgOS4xNi4zGgBy5wEIAxACGAEiEAAAAAAAAAAAAAAAAAAAAAAqECABBQJwlAAAAAAAAAAAADAw8+0CODVA7+zq9wVNMU3WNlI2kwIAAAABAAAAAAABCWZhY2Vib29rMQNjb20AAAEAAQAAKQIAAACAAAAMAAoACOxjCAG9zVgzWgUDY29tAGAAbQAAAAByZLM4AAAAAQAAAAAAAQJoNQdleGFtcGxlA2NvbQAABgABAAApBNABAUAAADkADwA1AAlubyBTRVAgbWF0Y2hpbmcgdGhlIERTIGZvdW5kIGZvciBkbnNzZWMtZmFpbGVkLm9yZy54AQ==")"#, + result: Ok(indoc!( + r#"{ + "dataType": "Message", + "dataTypeId": 1, + "extraInfo": "", + "messageType": "ResolverQuery", + "messageTypeId": 3, + "queryZone": "com.", + "requestData": { + "fullRcode": 0, + "header": { + "aa": false, + "ad": false, + "anCount": 0, + "arCount": 1, + "cd": false, + "id": 37634, + "nsCount": 0, + "opcode": 0, + "qdCount": 1, + "qr": 0, + "ra": false, + "rcode": 0, + "rd": false, + "tc": false + }, + "opt": { + "do": true, + "ednsVersion": 0, + "extendedRcode": 0, + "options": [ + { + "optCode": 10, + "optName": "Cookie", + "optValue": "7GMIAb3NWDM=" + } + ], + "udpPayloadSize": 512 + }, + "question": [ + { + "class": "IN", + "domainName": "facebook1.com.", + "questionType": "A", + "questionTypeId": 1 + } + ], + "rcodeName": "NoError" + }, + "responseData": { + "fullRcode": 16, + "header": { + "aa": false, + "ad": false, + "anCount": 0, + "arCount": 1, + "cd": false, + "id": 45880, + "nsCount": 0, + "opcode": 0, + "qdCount": 1, + "qr": 0, + "ra": false, + "rcode": 16, + "rd": false, + "tc": false + }, + "opt": { + "do": false, + "ednsVersion": 1, + "extendedRcode": 1, + "ede": [ + { + "extraText": "no SEP matching the DS found for dnssec-failed.org.", + "infoCode": 9, + "purpose": "DNSKEY Missing" + } + ], + "udpPayloadSize": 1232 + }, + "question": [ + { + "class": "IN", + "domainName": "h5.example.com.", + "questionType": "SOA", + "questionTypeId": 6 + } + ], + "rcodeName": "BADSIG" + }, + "responseAddress": "2001:502:7094::30", + "responsePort": 53, + "serverId": "james-Virtual-Machine", + "serverVersion": "BIND 9.16.3", + "socketFamily": "INET6", + "socketProtocol": "UDP", + "sourceAddress": "::", + "sourcePort": 46835, + "time": 1593489007920014129, + "timePrecision": "ns", + "timestamp": "2020-06-30T03:50:07.920014129Z" + }"# + )), + }] + } + + fn compile( + &self, + _state: &TypeState, + _ctx: &mut FunctionCompileContext, + arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + let lowercase_hostnames = arguments + .optional("lowercase_hostnames") + .unwrap_or_else(|| expr!(false)); + Ok(ParseDnstapFn { + value, + lowercase_hostnames, + } + .as_expr()) + } +} + +#[derive(Debug, Clone)] +struct ParseDnstapFn { + value: Box, + lowercase_hostnames: Box, +} + +impl FunctionExpression for ParseDnstapFn { + fn resolve(&self, ctx: &mut Context<'_>) -> Resolved { + let value = self.value.resolve(ctx)?; + let input = value.try_bytes_utf8_lossy()?; + + let mut event = LogEvent::default(); + + DnstapParser::parse( + &mut event, + BASE64_STANDARD + .decode(input.as_bytes()) + .map_err(|_| format!("{input} is not a valid base64 encoded string"))? + .into(), + DnsParserOptions { + lowercase_hostnames: self.lowercase_hostnames.resolve(ctx)?.try_boolean()?, + }, + ) + .map_err(|e| format!("dnstap parsing failed for {input}: {e}"))?; + + Ok(event.value().clone()) + } + + fn type_def(&self, _: &TypeState) -> TypeDef { + TypeDef::object(DnstapEventSchema.request_message_schema_definition()).fallible() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::{DateTime, TimeZone, Utc}; + use vrl::value; + + test_function![ + parse_dnstap => ParseDnstap; + + query { + args: func_args![value: value!("ChVqYW1lcy1WaXJ0dWFsLU1hY2hpbmUSC0JJTkQgOS4xNi4zGgBy5wEIAxACGAEiEAAAAAAAAAAAAAAAAAAAAAAqECABBQJwlAAAAAAAAAAAADAw8+0CODVA7+zq9wVNMU3WNlI2kwIAAAABAAAAAAABCWZhY2Vib29rMQNjb20AAAEAAQAAKQIAAACAAAAMAAoACOxjCAG9zVgzWgUDY29tAGAAbQAAAAByZLM4AAAAAQAAAAAAAQJoNQdleGFtcGxlA2NvbQAABgABAAApBNABAUAAADkADwA1AAlubyBTRVAgbWF0Y2hpbmcgdGhlIERTIGZvdW5kIGZvciBkbnNzZWMtZmFpbGVkLm9yZy54AQ==")], + want: Ok({ + let timestamp = Value::Timestamp( + Utc.from_utc_datetime( + &DateTime::parse_from_rfc3339("2020-06-30T03:50:07.920014129Z") + .unwrap() + .naive_utc(), + ), + ); + value!({ + dataType: "Message", + dataTypeId: 1, + extraInfo: "", + messageType: "ResolverQuery", + messageTypeId: 3, + queryZone: "com.", + requestData: { + fullRcode: 0, + header: { + aa: false, + ad: false, + anCount: 0, + arCount: 1, + cd: false, + id: 37634, + nsCount: 0, + opcode: 0, + qdCount: 1, + qr: 0, + ra: false, + rcode: 0, + rd: false, + tc: false, + }, + opt: { + do: true, + ednsVersion: 0, + extendedRcode: 0, + options: [ + { + optCode: 10, + optName: "Cookie", + optValue: "7GMIAb3NWDM=", + } + ], + udpPayloadSize: 512, + }, + question: [ + { + class: "IN", + domainName: "facebook1.com.", + questionType: "A", + questionTypeId: 1, + } + ], + rcodeName: "NoError", + }, + responseData: { + fullRcode: 16, + header: { + aa: false, + ad: false, + anCount: 0, + arCount: 1, + cd: false, + id: 45880, + nsCount: 0, + opcode: 0, + qdCount: 1, + qr: 0, + ra: false, + rcode: 16, + rd: false, + tc: false, + }, + opt: { + do: false, + ede: [ + { + extraText: "no SEP matching the DS found for dnssec-failed.org.", + infoCode: 9, + purpose: "DNSKEY Missing", + } + ], + ednsVersion: 1, + extendedRcode: 1, + udpPayloadSize: 1232, + }, + question: [ + { + class: "IN", + domainName: "h5.example.com.", + questionType: "SOA", + questionTypeId: 6, + } + ], + rcodeName: "BADSIG", + }, + responseAddress: "2001:502:7094::30", + responsePort: 53, + serverId: "james-Virtual-Machine", + serverVersion: "BIND 9.16.3", + socketFamily: "INET6", + socketProtocol: "UDP", + sourceAddress: "::", + sourcePort: 46835, + time: 1_593_489_007_920_014_129i64, + timePrecision: "ns", + timestamp: timestamp + }) + }), + tdef: TypeDef::object(DnstapEventSchema.request_message_schema_definition()).fallible(), + } + + update { + args: func_args![value: value!("ChVqYW1lcy1WaXJ0dWFsLU1hY2hpbmUSC0JJTkQgOS4xNi4zcmsIDhABGAEiBH8AAAEqBH8AAAEwrG44AEC+iu73BU14gfofUh1wi6gAAAEAAAAAAAAHZXhhbXBsZQNjb20AAAYAAWC+iu73BW0agDwvch1wi6gAAAEAAAAAAAAHZXhhbXBsZQNjb20AAAYAAXgB")], + want: Ok({ + let timestamp = Value::Timestamp( + Utc.from_utc_datetime( + &DateTime::parse_from_rfc3339("2020-06-30T18:32:30.792494106Z") + .unwrap() + .naive_utc(), + ), + ); + value!({ + dataType: "Message", + dataTypeId: 1, + messageType: "UpdateResponse", + messageTypeId: 14, + requestData: { + fullRcode: 0, + header: { + adCount: 0, + id: 28811, + opcode: 5, + prCount: 0, + qr: 1, + rcode: 0, + upCount: 0, + zoCount: 1 + }, + zone: { + zClass: "IN", + zName: "example.com.", + zType: "SOA", + zTypeId: 6 + }, + rcodeName: "NoError", + }, + responseAddress: "127.0.0.1", + responseData: { + fullRcode: 0, + header: { + adCount: 0, + id: 28811, + opcode: 5, + prCount: 0, + qr: 1, + rcode: 0, + upCount: 0, + zoCount: 1 + }, + zone: { + zClass: "IN", + zName: "example.com.", + zType: "SOA", + zTypeId: 6 + }, + rcodeName: "NoError", + }, + responsePort: 0, + serverId: "james-Virtual-Machine", + serverVersion: "BIND 9.16.3", + socketFamily: "INET", + socketProtocol: "UDP", + sourceAddress: "127.0.0.1", + sourcePort: 14124, + time: 1_593_541_950_792_494_106i64, + timePrecision: "ns", + timestamp: timestamp + }) + }), + tdef: TypeDef::object(DnstapEventSchema.request_message_schema_definition()).fallible(), + } + + non_base64_value { + args: func_args![value: value!("non base64 string")], + want: Err("non base64 string is not a valid base64 encoded string"), + tdef: TypeDef::object(DnstapEventSchema.request_message_schema_definition()).fallible(), + } + + invalid_dnstap_data { + args: func_args![value: value!("bm9uIGRuc3RhcCBkYXRh")], + want: Err("dnstap parsing failed for bm9uIGRuc3RhcCBkYXRh: failed to decode Protobuf message: invalid wire type value: 6"), + tdef: TypeDef::object(DnstapEventSchema.request_message_schema_definition()).fallible(), + } + ]; +} diff --git a/lib/vector-vrl/tests/Cargo.toml b/lib/vector-vrl/tests/Cargo.toml index 6e7ae90646cc1..52c0cb59d1633 100644 --- a/lib/vector-vrl/tests/Cargo.toml +++ b/lib/vector-vrl/tests/Cargo.toml @@ -7,6 +7,7 @@ publish = false [dependencies] chrono-tz.workspace = true +dnstap-parser = { path = "../../dnstap-parser" } enrichment = { path = "../../enrichment" } vrl.workspace = true vector-vrl-functions = { path = "../../vector-vrl/functions" } diff --git a/lib/vector-vrl/tests/src/main.rs b/lib/vector-vrl/tests/src/main.rs index 962f68b2ef296..4fd751796935f 100644 --- a/lib/vector-vrl/tests/src/main.rs +++ b/lib/vector-vrl/tests/src/main.rs @@ -97,6 +97,7 @@ fn main() { let mut functions = vrl::stdlib::all(); functions.extend(vector_vrl_functions::all()); + functions.extend(dnstap_parser::vrl_functions()); functions.extend(enrichment::vrl_functions()); run_tests( @@ -132,6 +133,7 @@ fn get_tests(cmd: &Cmd) -> Vec { vector_vrl_functions::all() .into_iter() .chain(enrichment::vrl_functions()) + .chain(dnstap_parser::vrl_functions()) .collect(), )) .filter(|test| { diff --git a/lib/vector-vrl/web-playground/Cargo.toml b/lib/vector-vrl/web-playground/Cargo.toml index 13057cff7b374..3f867a3b80363 100644 --- a/lib/vector-vrl/web-playground/Cargo.toml +++ b/lib/vector-vrl/web-playground/Cargo.toml @@ -13,6 +13,7 @@ wasm-bindgen = "0.2" vrl.workspace = true serde.workspace = true serde-wasm-bindgen = "0.6" +dnstap-parser = { path = "../../dnstap-parser" } gloo-utils = { version = "0.2", features = ["serde"] } getrandom = { version = "0.2", features = ["js"] } vector-vrl-functions = { path = "../functions" } diff --git a/lib/vector-vrl/web-playground/src/lib.rs b/lib/vector-vrl/web-playground/src/lib.rs index a434050aefa3c..d6e4f9e441fe5 100644 --- a/lib/vector-vrl/web-playground/src/lib.rs +++ b/lib/vector-vrl/web-playground/src/lib.rs @@ -80,6 +80,7 @@ fn compile(mut input: Input) -> Result { let mut functions = vrl::stdlib::all(); functions.extend(vector_vrl_functions::all()); functions.extend(enrichment::vrl_functions()); + functions.extend(dnstap_parser::vrl_functions()); let event = &mut input.event; let state = TypeState::default(); diff --git a/src/conditions/vrl.rs b/src/conditions/vrl.rs index db8ea2aacdc0f..e8170e3ec51bd 100644 --- a/src/conditions/vrl.rs +++ b/src/conditions/vrl.rs @@ -44,7 +44,11 @@ impl ConditionalConfig for VrlConfig { let functions = vrl::stdlib::all() .into_iter() - .chain(vector_lib::enrichment::vrl_functions()) + .chain(vector_lib::enrichment::vrl_functions()); + #[cfg(feature = "sources-dnstap")] + let functions = functions.chain(dnstap_parser::vrl_functions()); + + let functions = functions .chain(vector_vrl_functions::all()) .collect::>(); diff --git a/src/internal_events/dnstap.rs b/src/internal_events/dnstap.rs index 9bbbbb6f04428..231a6527830d7 100644 --- a/src/internal_events/dnstap.rs +++ b/src/internal_events/dnstap.rs @@ -24,20 +24,3 @@ impl InternalEvent for DnstapParseError { .increment(1); } } - -#[derive(Debug)] -pub(crate) struct DnstapParseWarning { - pub error: E, -} - -impl InternalEvent for DnstapParseWarning { - fn emit(self) { - warn!( - message = "Recoverable error occurred while parsing dnstap data.", - error = %self.error, - stage = error_stage::PROCESSING, - error_type = error_type::PARSER_FAILED, - internal_log_rate_limit = true, - ); - } -} diff --git a/src/sources/dnstap/mod.rs b/src/sources/dnstap/mod.rs index 2ebaa6a922dee..797f9e28e0c86 100644 --- a/src/sources/dnstap/mod.rs +++ b/src/sources/dnstap/mod.rs @@ -2,6 +2,8 @@ use std::path::PathBuf; use base64::prelude::{Engine as _, BASE64_STANDARD}; use dnsmsg_parser::dns_message_parser::DnsParserOptions; +use dnstap_parser::parser::DnstapParser; +use dnstap_parser::schema::DnstapEventSchema; use vector_lib::event::{Event, LogEvent}; use vector_lib::internal_event::{ ByteSize, BytesReceived, InternalEventHandle, Protocol, Registered, @@ -11,25 +13,19 @@ use vector_lib::{configurable::configurable_component, tls::MaybeTlsSettings}; use vrl::path::{OwnedValuePath, PathPrefix}; use vrl::value::{kind::Collection, Kind}; -use self::parser::DnstapParser; - use super::util::framestream::{ build_framestream_tcp_source, build_framestream_unix_source, FrameHandler, }; use crate::internal_events::DnstapParseError; -use crate::sources::dnstap::schema::DNSTAP_VALUE_PATHS; use crate::{ config::{log_schema, DataType, SourceConfig, SourceContext, SourceOutput}, Result, }; +use dnstap_parser::schema::DNSTAP_VALUE_PATHS; -pub mod parser; -pub mod schema; pub mod tcp; #[cfg(unix)] pub mod unix; -use dnsmsg_parser::{dns_message, dns_message_parser}; -pub use schema::DnstapEventSchema; use vector_lib::config::{LegacyKey, LogNamespace}; use vector_lib::lookup::lookup_v2::OptionalValuePath; diff --git a/src/sources/dnstap/unix.rs b/src/sources/dnstap/unix.rs index 80229f715bd5b..de8b96f3c7a36 100644 --- a/src/sources/dnstap/unix.rs +++ b/src/sources/dnstap/unix.rs @@ -11,7 +11,6 @@ use crate::{ sources::util::framestream::UnixFrameHandler, }; -pub use super::schema::DnstapEventSchema; use vector_lib::EstimatedJsonEncodedSizeOf; /// Unix domain socket configuration for the `dnstap` source. diff --git a/src/transforms/remap.rs b/src/transforms/remap.rs index d7522cbe50374..6b44bbbe1f4f4 100644 --- a/src/transforms/remap.rs +++ b/src/transforms/remap.rs @@ -207,6 +207,7 @@ impl RemapConfig { let mut functions = vrl::stdlib::all(); functions.append(&mut vector_lib::enrichment::vrl_functions()); + functions.append(&mut dnstap_parser::vrl_functions()); functions.append(&mut vector_vrl_functions::all()); let state = TypeState { diff --git a/vdev/Cargo.toml b/vdev/Cargo.toml index 98dfa71c154d6..a888f00d3137f 100644 --- a/vdev/Cargo.toml +++ b/vdev/Cargo.toml @@ -8,7 +8,7 @@ readme = "README.md" publish = false [dependencies] -anyhow = "1.0.94" +anyhow.workspace = true chrono.workspace = true clap.workspace = true clap-verbosity-flag = "3.0.1" diff --git a/website/cue/reference/remap/functions/parse_dnstap.cue b/website/cue/reference/remap/functions/parse_dnstap.cue new file mode 100644 index 0000000000000..55fc381adc767 --- /dev/null +++ b/website/cue/reference/remap/functions/parse_dnstap.cue @@ -0,0 +1,142 @@ +package metadata + +remap: functions: parse_dnstap: { + category: "Parse" + description: """ + Parses the `value` as base64 encoded DNSTAP data. + """ + notices: [] + + arguments: [ + { + name: "value" + description: "The base64 encoded representation of the DNSTAP data to parse." + required: true + type: ["string"] + }, + { + name: "lowercase_hostnames" + description: """ + Whether to turn all hostnames found in resulting data lowercase, for consistency. + """ + required: false + default: false + type: ["boolean"] + }, + ] + internal_failure_reasons: [ + "`value` is not a valid base64 encoded string.", + "dnstap parsing failed for `value`", + ] + return: types: ["object"] + + examples: [ + { + title: "Parse dnstap query message" + source: #""" + parse_dnstap!("ChVqYW1lcy1WaXJ0dWFsLU1hY2hpbmUSC0JJTkQgOS4xNi4zGgBy5wEIAxACGAEiEAAAAAAAAAAAAAAAAAAAAAAqECABBQJwlAAAAAAAAAAAADAw8+0CODVA7+zq9wVNMU3WNlI2kwIAAAABAAAAAAABCWZhY2Vib29rMQNjb20AAAEAAQAAKQIAAACAAAAMAAoACOxjCAG9zVgzWgUDY29tAGAAbQAAAAByZLM4AAAAAQAAAAAAAQJoNQdleGFtcGxlA2NvbQAABgABAAApBNABAUAAADkADwA1AAlubyBTRVAgbWF0Y2hpbmcgdGhlIERTIGZvdW5kIGZvciBkbnNzZWMtZmFpbGVkLm9yZy54AQ==") + """# + return: { + "dataType": "Message" + "dataTypeId": 1 + "extraInfo": "" + "messageType": "ResolverQuery" + "messageTypeId": 3 + "queryZone": "com." + "requestData": { + "fullRcode": 0 + "header": { + "aa": false + "ad": false + "anCount": 0 + "arCount": 1 + "cd": false + "id": 37634 + "nsCount": 0 + "opcode": 0 + "qdCount": 1 + "qr": 0 + "ra": false + "rcode": 0 + "rd": false + "tc": false + } + "opt": { + "do": true + "ednsVersion": 0 + "extendedRcode": 0 + "options": [ + { + "optCode": 10 + "optName": "Cookie" + "optValue": "7GMIAb3NWDM=" + }, + ] + "udpPayloadSize": 512 + } + "question": [ + { + "class": "IN" + "domainName": "facebook1.com." + "questionType": "A" + "questionTypeId": 1 + }, + ] + "rcodeName": "NoError" + } + "responseData": { + "fullRcode": 16 + "header": { + "aa": false + "ad": false + "anCount": 0 + "arCount": 1 + "cd": false + "id": 45880 + "nsCount": 0 + "opcode": 0 + "qdCount": 1 + "qr": 0 + "ra": false + "rcode": 16 + "rd": false + "tc": false + } + "opt": { + "do": false + "ednsVersion": 1 + "extendedRcode": 1 + "ede": [ + { + "extraText": "no SEP matching the DS found for dnssec-failed.org." + "infoCode": 9 + "purpose": "DNSKEY Missing" + }, + ] + "udpPayloadSize": 1232 + } + "question": [ + { + "class": "IN" + "domainName": "h5.example.com." + "questionType": "SOA" + "questionTypeId": 6 + }, + ] + "rcodeName": "BADSIG" + } + "responseAddress": "2001:502:7094::30" + "responsePort": 53 + "serverId": "james-Virtual-Machine" + "serverVersion": "BIND 9.16.3" + "socketFamily": "INET6" + "socketProtocol": "UDP" + "sourceAddress": "::" + "sourcePort": 46835 + "time": 1_593_489_007_920_014_129 + "timePrecision": "ns" + "timestamp": "2020-06-30T03:50:07.920014129Z" + } + }, + ] +}