Skip to content

Commit

Permalink
feat(enrichment_tables): add support for custom MMDB types (#20054)
Browse files Browse the repository at this point in the history
* feat(enrichment_tables): add support for custom MMDB types

This adds support for custom MMDB types. It will just return whatever
is stored in the database, without further modifications. Test data was
generated using the official go example:
https://github.com/maxmind/mmdbwriter/blob/main/examples/asn-writer/main.go

Fixes: #19995

* Add changelog entry

* Change `hostname` in tests to an actual word to avoid spellcheck

* Update `enrichment_tables` docs

* Update docs

Co-authored-by: Ursula Chen <[email protected]>

* Add separate `mmdb` enrichment table type

* Update docs

* Remove todos

* Update comment on geoip `DatabaseKind`

* Update changelog entry

* Fix mmdb docs

* Add benches for mmdb enrichment_tables

---------

Co-authored-by: Ursula Chen <[email protected]>
  • Loading branch information
esensar and urseberry authored Mar 14, 2024
1 parent ccaa7e3 commit d511e89
Show file tree
Hide file tree
Showing 9 changed files with 436 additions and 15 deletions.
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -483,8 +483,9 @@ protobuf-build = ["dep:tonic-build", "dep:prost-build"]
gcp = ["dep:base64", "dep:goauth", "dep:smpl_jwt"]

# Enrichment Tables
enrichment-tables = ["enrichment-tables-geoip"]
enrichment-tables = ["enrichment-tables-geoip", "enrichment-tables-mmdb"]
enrichment-tables-geoip = ["dep:maxminddb"]
enrichment-tables-mmdb = ["dep:maxminddb"]

# Codecs
codecs-syslog = ["vector-lib/syslog"]
Expand Down Expand Up @@ -942,7 +943,7 @@ remap-benches = ["transforms-remap"]
transform-benches = ["transforms-filter", "transforms-dedupe", "transforms-reduce", "transforms-route"]
codecs-benches = []
loki-benches = ["sinks-loki"]
enrichment-tables-benches = ["enrichment-tables-geoip"]
enrichment-tables-benches = ["enrichment-tables-geoip", "enrichment-tables-mmdb"]

[[bench]]
name = "default"
Expand Down
85 changes: 84 additions & 1 deletion benches/enrichment_tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
use vector::enrichment_tables::{
file::File,
geoip::{Geoip, GeoipConfig},
mmdb::{Mmdb, MmdbConfig},
Condition, Table,
};
use vector_lib::enrichment::Case;
Expand All @@ -13,7 +14,7 @@ use vrl::value::{ObjectMap, Value};
criterion_group!(
name = benches;
config = Criterion::default().noise_threshold(0.02).sample_size(10);
targets = benchmark_enrichment_tables_file, benchmark_enrichment_tables_geoip
targets = benchmark_enrichment_tables_file, benchmark_enrichment_tables_geoip, benchmark_enrichment_tables_mmdb
);
criterion_main!(benches);

Expand Down Expand Up @@ -323,3 +324,85 @@ fn benchmark_enrichment_tables_geoip(c: &mut Criterion) {
);
});
}

fn benchmark_enrichment_tables_mmdb(c: &mut Criterion) {
let mut group = c.benchmark_group("enrichment_tables_mmdb");
let build = |path: &str| {
Mmdb::new(MmdbConfig {
path: path.to_string(),
})
.unwrap()
};

group.bench_function("enrichment_tables/mmdb_isp", |b| {
let table = build("tests/data/GeoIP2-ISP-Test.mmdb");
let ip = "208.192.1.2";
let mut expected = ObjectMap::new();
expected.insert("autonomous_system_number".into(), 701i64.into());
expected.insert(
"autonomous_system_organization".into(),
"MCI Communications Services, Inc. d/b/a Verizon Business".into(),
);
expected.insert("isp".into(), "Verizon Business".into());
expected.insert("organization".into(), "Verizon Business".into());

b.iter_batched(
|| (&table, ip, &expected),
|(table, ip, expected)| {
assert_eq!(
Ok(expected),
table
.find_table_row(
Case::Insensitive,
&[Condition::Equals {
field: "ip",
value: ip.into(),
}],
None,
None,
)
.as_ref()
)
},
BatchSize::SmallInput,
);
});

group.bench_function("enrichment_tables/mmdb_city", |b| {
let table = build("tests/data/GeoIP2-City-Test.mmdb");
let ip = "67.43.156.9";
let mut expected = ObjectMap::new();
expected.insert(
"location".into(),
ObjectMap::from([
("latitude".into(), Value::from(27.5)),
("longitude".into(), Value::from(90.5)),
])
.into(),
);

b.iter_batched(
|| (&table, ip, &expected),
|(table, ip, expected)| {
assert_eq!(
Ok(expected),
table
.find_table_row(
Case::Insensitive,
&[Condition::Equals {
field: "ip",
value: ip.into(),
}],
Some(&[
"location.latitude".to_string(),
"location.longitude".to_string(),
]),
None,
)
.as_ref()
)
},
BatchSize::SmallInput,
);
});
}
5 changes: 5 additions & 0 deletions changelog.d/20054_custom_mmdb_types.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Added support for custom MMDB enrichment tables. GeoIP enrichment tables will no longer fall back to
City type for unknown types and will instead return an error. New MMDB enrichment table should be
used for such types.

authors: esensar
39 changes: 29 additions & 10 deletions src/enrichment_tables/geoip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ use vrl::value::{ObjectMap, Value};
use crate::config::{EnrichmentTableConfig, GenerateConfig};

// MaxMind GeoIP database files have a type field we can use to recognize specific
// products. If we encounter one of these two types, we look for ASN/ISP information;
// otherwise we expect to be working with a City database.
// products. If it is an unknown type, an error will be returned.
#[derive(Copy, Clone, Debug)]
#[allow(missing_docs)]
pub enum DatabaseKind {
Expand All @@ -29,13 +28,16 @@ pub enum DatabaseKind {
City,
}

impl From<&str> for DatabaseKind {
fn from(v: &str) -> Self {
match v {
"GeoLite2-ASN" => Self::Asn,
"GeoIP2-ISP" => Self::Isp,
"GeoIP2-Connection-Type" => Self::ConnectionType,
_ => Self::City,
impl TryFrom<&str> for DatabaseKind {
type Error = ();

fn try_from(value: &str) -> Result<Self, Self::Error> {
match value {
"GeoLite2-ASN" => Ok(Self::Asn),
"GeoIP2-ISP" => Ok(Self::Isp),
"GeoIP2-Connection-Type" => Ok(Self::ConnectionType),
"GeoIP2-City" => Ok(Self::City),
_ => Err(()),
}
}
}
Expand All @@ -48,6 +50,7 @@ pub struct GeoipConfig {
/// (**GeoLite2-City.mmdb**).
///
/// Other databases, such as the country database, are not supported.
/// `mmdb` enrichment table can be used for other databases.
///
/// [geoip2]: https://dev.maxmind.com/geoip/geoip2/downloadable
/// [geolite2]: https://dev.maxmind.com/geoip/geoip2/geolite2/#Download_Access
Expand Down Expand Up @@ -112,7 +115,13 @@ impl Geoip {
/// Creates a new GeoIP struct from the provided config.
pub fn new(config: GeoipConfig) -> crate::Result<Self> {
let dbreader = Arc::new(Reader::open_readfile(config.path.clone())?);
let dbkind = DatabaseKind::from(dbreader.metadata.database_type.as_str());
let dbkind =
DatabaseKind::try_from(dbreader.metadata.database_type.as_str()).map_err(|_| {
format!(
"Unsupported MMDB database type ({}). Use `mmdb` enrichment table instead.",
dbreader.metadata.database_type
)
})?;

// Check if we can read database with dummy Ip.
let ip = IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED);
Expand Down Expand Up @@ -444,6 +453,16 @@ mod tests {
assert!(values.is_none());
}

#[test]
fn custom_mmdb_type_error() {
let result = Geoip::new(GeoipConfig {
path: "tests/data/custom-type.mmdb".to_string(),
locale: default_locale(),
});

assert!(result.is_err());
}

fn find(ip: &str, database: &str) -> Option<ObjectMap> {
find_select(ip, database, None)
}
Expand Down
Loading

0 comments on commit d511e89

Please sign in to comment.