From cce784211b1cc88d5d75044359ce7ab12c619470 Mon Sep 17 00:00:00 2001 From: Oluwapeluwa Ibrahim Date: Thu, 10 Apr 2025 00:33:01 +0100 Subject: [PATCH 01/14] otel support --- Cargo.lock | 132 ++++++++++++++++++++++++++++++++++++----------- Cargo.toml | 11 ++-- src/database.rs | 12 +++++ src/lib.rs | 1 + src/main.rs | 67 ++++++++++++++---------- src/telemetry.rs | 76 +++++++++++++++++++++++++++ 6 files changed, 236 insertions(+), 63 deletions(-) create mode 100644 src/telemetry.rs diff --git a/Cargo.lock b/Cargo.lock index c130005..1f36c56 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -527,7 +527,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap", + "indexmap 2.8.0", "lexical-core", "num", "serde", @@ -989,7 +989,7 @@ dependencies = [ "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", - "tower", + "tower 0.5.2", "tracing", ] @@ -2036,7 +2036,7 @@ dependencies = [ "base64 0.22.1", "half", "hashbrown 0.14.5", - "indexmap", + "indexmap 2.8.0", "libc", "log 0.4.27", "object_store", @@ -2131,7 +2131,7 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap", + "indexmap 2.8.0", "paste", "recursive", "serde_json", @@ -2146,7 +2146,7 @@ checksum = "18f0a851a436c5a2139189eb4617a54e6a9ccb9edc96c4b3c83b3bb7c58b950e" dependencies = [ "arrow", "datafusion-common", - "indexmap", + "indexmap 2.8.0", "itertools 0.14.0", "paste", ] @@ -2312,7 +2312,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "indexmap", + "indexmap 2.8.0", "itertools 0.14.0", "log 0.4.27", "recursive", @@ -2335,7 +2335,7 @@ dependencies = [ "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "indexmap", + "indexmap 2.8.0", "itertools 0.14.0", "log 0.4.27", "paste", @@ -2397,7 +2397,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.5", - "indexmap", + "indexmap 2.8.0", "itertools 0.14.0", "log 0.4.27", "parking_lot 0.12.3", @@ -2454,7 +2454,7 @@ dependencies = [ "bigdecimal", "datafusion-common", "datafusion-expr", - "indexmap", + "indexmap 2.8.0", "log 0.4.27", "recursive", "regex 1.11.1", @@ -2485,7 +2485,7 @@ dependencies = [ "fix-hidden-lifetime-bug", "futures", "home", - "indexmap", + "indexmap 2.8.0", "itertools 0.13.0", "object_store", "parquet", @@ -2589,7 +2589,7 @@ dependencies = [ "fix-hidden-lifetime-bug", "futures", "humantime", - "indexmap", + "indexmap 2.8.0", "itertools 0.14.0", "libc", "maplit", @@ -3154,7 +3154,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap", + "indexmap 2.8.0", "slab", "tokio", "tokio-util", @@ -3173,7 +3173,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.3.1", - "indexmap", + "indexmap 2.8.0", "slab", "tokio", "tokio-util", @@ -3422,6 +3422,19 @@ dependencies = [ "tower-service", ] +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper 1.6.0", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "hyper-tls" version = "0.6.0" @@ -3632,6 +3645,16 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + [[package]] name = "indexmap" version = "2.8.0" @@ -4136,6 +4159,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "mutually_exclusive_features" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94e1e6445d314f972ff7395df2de295fe51b71821694f0b0e1e79c4f12c8577" + [[package]] name = "native-tls" version = "0.2.14" @@ -4360,9 +4389,9 @@ dependencies = [ [[package]] name = "opentelemetry" -version = "0.28.0" +version = "0.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "236e667b670a5cdf90c258f5a55794ec5ac5027e960c224bff8367a59e1e6426" +checksum = "9e87237e2775f74896f9ad219d26a2081751187eb7c9f5c58dde20a23b95d16c" dependencies = [ "futures-core", "futures-sink", @@ -4374,9 +4403,9 @@ dependencies = [ [[package]] name = "opentelemetry-http" -version = "0.28.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8863faf2910030d139fb48715ad5ff2f35029fc5f244f6d5f689ddcf4d26253" +checksum = "46d7ab32b827b5b495bd90fa95a6cb65ccc293555dcc3199ae2937d2d237c8ed" dependencies = [ "async-trait", "bytes", @@ -4388,11 +4417,10 @@ dependencies = [ [[package]] name = "opentelemetry-otlp" -version = "0.28.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bef114c6d41bea83d6dc60eb41720eedd0261a67af57b66dd2b84ac46c01d91" +checksum = "d899720fe06916ccba71c01d04ecd77312734e2de3467fd30d9d580c8ce85656" dependencies = [ - "async-trait", "futures-core", "http 1.3.1", "opentelemetry", @@ -4402,14 +4430,16 @@ dependencies = [ "prost", "reqwest", "thiserror 2.0.12", + "tokio", + "tonic", "tracing", ] [[package]] name = "opentelemetry-proto" -version = "0.28.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f8870d3024727e99212eb3bb1762ec16e255e3e6f58eeb3dc8db1aa226746d" +checksum = "8c40da242381435e18570d5b9d50aca2a4f4f4d8e146231adb4e7768023309b3" dependencies = [ "opentelemetry", "opentelemetry_sdk", @@ -4419,20 +4449,21 @@ dependencies = [ [[package]] name = "opentelemetry_sdk" -version = "0.28.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84dfad6042089c7fc1f6118b7040dc2eb4ab520abbf410b79dc481032af39570" +checksum = "afdefb21d1d47394abc1ba6c57363ab141be19e27cc70d0e422b7f303e4d290b" dependencies = [ - "async-trait", "futures-channel", "futures-executor", "futures-util", "glob", "opentelemetry", "percent-encoding", - "rand 0.8.5", + "rand 0.9.0", "serde_json", "thiserror 2.0.12", + "tokio", + "tokio-stream", "tracing", ] @@ -4593,7 +4624,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset", - "indexmap", + "indexmap 2.8.0", ] [[package]] @@ -5265,7 +5296,7 @@ dependencies = [ "tokio-native-tls", "tokio-rustls 0.26.2", "tokio-util", - "tower", + "tower 0.5.2", "tower-service", "url", "wasm-bindgen", @@ -6296,6 +6327,7 @@ dependencies = [ "tokio-rustls 0.26.2", "tokio-util", "tracing", + "tracing-actix-web", "tracing-opentelemetry", "tracing-subscriber", "url", @@ -6467,7 +6499,7 @@ version = "0.22.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" dependencies = [ - "indexmap", + "indexmap 2.8.0", "toml_datetime", "winnow", ] @@ -6484,10 +6516,35 @@ dependencies = [ "http 1.3.1", "http-body 1.0.1", "http-body-util", + "hyper 1.6.0", + "hyper-timeout", + "hyper-util", "percent-encoding", "pin-project", "prost", + "tokio", "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "indexmap 1.9.3", + "pin-project", + "pin-project-lite", + "rand 0.8.5", + "slab", + "tokio", + "tokio-util", "tower-layer", "tower-service", "tracing", @@ -6532,6 +6589,19 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-actix-web" +version = "0.7.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "332bbdf3bd208d1fe6446f8ffb4e8c2ae66e25da0fb38e0b69545e640ecee6a6" +dependencies = [ + "actix-web", + "mutually_exclusive_features", + "pin-project", + "tracing", + "uuid", +] + [[package]] name = "tracing-attributes" version = "0.1.28" @@ -6576,9 +6646,9 @@ dependencies = [ [[package]] name = "tracing-opentelemetry" -version = "0.29.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "721f2d2569dce9f3dfbbddee5906941e953bfcdf736a62da3377f5751650cc36" +checksum = "fd8e764bd6f5813fd8bebc3117875190c5b0415be8f7f8059bffb6ecd979c444" dependencies = [ "js-sys", "once_cell", diff --git a/Cargo.toml b/Cargo.toml index e58754d..d4fc5dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,13 +45,11 @@ rustls = "0.23.23" actix-service = "2.0.2" lazy_static = "1.5.0" bcrypt = "0.17.0" -opentelemetry = "0.28.0" -opentelemetry-otlp = "0.28.0" -tracing-opentelemetry = "0.29.0" +opentelemetry_sdk = { version = "0.29.0", features = ["experimental_async_runtime", "rt-tokio"] } +opentelemetry-otlp = { version = "0.29.0", features = ["grpc-tonic"] } +opentelemetry = "0.29.0" +tracing-opentelemetry = "0.30.0" bincode = "1.3.3" -opentelemetry_sdk = { version = "0.28.0", features = [ - "experimental_async_runtime", -] } actix-files = "0.6.6" datafusion-uwheel = { git = "https://github.com/apitoolkit/datafusion-uwheel.git", branch = "datafusion-46" } sqllogictest = { git = "https://github.com/risinglightdb/sqllogictest-rs.git" } @@ -62,6 +60,7 @@ aws-types = "1.3.6" aws-sdk-s3 = "1.3.0" url = "2.5.4" datafusion-common = "46.0.0" +tracing-actix-web = "0.7.16" [dev-dependencies] serial_test = "3.2.0" diff --git a/src/database.rs b/src/database.rs index d8812a7..9e230fd 100644 --- a/src/database.rs +++ b/src/database.rs @@ -46,6 +46,7 @@ impl Clone for Database { } impl Database { + #[tracing::instrument(name = "db.new", skip())] pub async fn new() -> Result { let bucket = env::var("AWS_S3_BUCKET").expect("AWS_S3_BUCKET environment variable not set"); let aws_endpoint = env::var("AWS_S3_ENDPOINT").unwrap_or_else(|_| "https://s3.amazonaws.com".to_string()); @@ -71,6 +72,7 @@ impl Database { } #[cfg(test)] + #[tracing::instrument(name = "db.new_for_test", skip())] pub async fn new_for_test() -> Result { // For tests, we directly configure all AWS env vars info!("Starting Database in test mode"); @@ -111,6 +113,7 @@ impl Database { } /// Create and configure a SessionContext with DataFusion settings + #[tracing::instrument(name = "db.create_session_context", skip(self))] pub fn create_session_context(&self) -> SessionContext { use datafusion::config::ConfigOptions; use datafusion::execution::context::SessionContext; @@ -121,6 +124,7 @@ impl Database { } /// Setup the session context with tables and register DataFusion tables + #[tracing::instrument(name = "db.setup_session_context", skip(self, ctx))] pub fn setup_session_context(&self, ctx: &SessionContext) -> DFResult<()> { use crate::persistent_queue::OtelLogsAndSpans; @@ -137,6 +141,7 @@ impl Database { } /// Register PostgreSQL settings table for compatibility + #[tracing::instrument(name = "db.register_pg_settings_table", skip(self, ctx))] pub fn register_pg_settings_table(&self, ctx: &SessionContext) -> datafusion::error::Result<()> { use datafusion::arrow::array::StringArray; use datafusion::arrow::datatypes::{DataType, Field, Schema}; @@ -157,6 +162,7 @@ impl Database { } /// Register set_config UDF for PostgreSQL compatibility + #[tracing::instrument(name = "db.register_set_config_udf", skip(self, ctx))] pub fn register_set_config_udf(&self, ctx: &SessionContext) { use datafusion::arrow::array::{StringArray, StringBuilder}; use datafusion::arrow::datatypes::DataType; @@ -191,6 +197,7 @@ impl Database { } /// Start a PGWire server with the given session context + #[tracing::instrument(name = "db.start_pgwire_server", skip(self, session_context, shutdown_token), fields(port))] pub async fn start_pgwire_server( &self, session_context: SessionContext, port: u16, shutdown_token: CancellationToken, ) -> Result> { @@ -242,6 +249,7 @@ impl Database { Ok(pg_server) } + #[tracing::instrument(name = "db.resolve_table", skip(self), fields(project_id))] pub async fn resolve_table(&self, project_id: &str) -> DFResult>> { let project_configs = self.project_configs.read().await; @@ -265,6 +273,7 @@ impl Database { ))) } + #[tracing::instrument(name = "db.insert_records_batch", skip(self, _table, batch), fields(batch_size = batch.len()))] pub async fn insert_records_batch(&self, _table: &str, batch: Vec) -> Result<()> { // Get the table reference for the default project let (_conn_str, _options, table_ref) = { @@ -282,6 +291,8 @@ impl Database { } #[cfg(test)] + + #[tracing::instrument(name = "db.insert_records", skip(self, records))] pub async fn insert_records(&self, records: &Vec) -> Result<()> { // TODO: insert records doesn't need to accept a project_id as they can be read from the // record. @@ -298,6 +309,7 @@ impl Database { self.insert_records_batch("default", vec![batch]).await } + #[tracing::instrument(name = "db.register_project", skip(self, conn_str, access_key, secret_key, endpoint), fields(project_id))] pub async fn register_project( &self, project_id: &str, conn_str: &str, access_key: Option<&str>, secret_key: Option<&str>, endpoint: Option<&str>, ) -> Result<()> { diff --git a/src/lib.rs b/src/lib.rs index 149debd..8d4122b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ // lib.rs - Export modules for use in tests pub mod database; pub mod persistent_queue; +pub mod telemetry; diff --git a/src/main.rs b/src/main.rs index ef9247d..deecfd9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ -// main.rs +// src/main.rs +mod telemetry; mod database; mod persistent_queue; use actix_web::{App, HttpResponse, HttpServer, Responder, middleware::Logger, post, web}; @@ -10,7 +11,7 @@ use std::{env, sync::Arc}; use tokio::time::{Duration, sleep}; use tokio_util::sync::CancellationToken; use tracing::{error, info}; -use tracing_subscriber::EnvFilter; +use tracing_actix_web::TracingLogger; #[derive(Clone)] struct AppInfo {} @@ -24,8 +25,16 @@ struct RegisterProjectRequest { endpoint: Option, } +#[tracing::instrument( + name = "HTTP /register_project", + skip(req, db), + fields(project_id = %req.project_id) +)] #[post("/register_project")] -async fn register_project(req: web::Json, db: web::Data>) -> impl Responder { +async fn register_project( + req: web::Json, + db: web::Data>, +) -> impl Responder { match db .register_project( &req.project_id, @@ -36,57 +45,64 @@ async fn register_project(req: web::Json, db: web::Data< ) .await { - Ok(()) => HttpResponse::Ok().json(serde_json::json!({ - "message": format!("Project '{}' registered successfully", req.project_id) - })), - Err(e) => HttpResponse::InternalServerError().json(serde_json::json!({ - "error": format!("Failed to register project: {:?}", e) - })), + Ok(()) => { + info!("Project registered successfully"); + HttpResponse::Ok().json(serde_json::json!({ + "message": format!("Project '{}' registered successfully", req.project_id) + })) + } + Err(e) => { + error!("Failed to register project: {:?}", e); + HttpResponse::InternalServerError().json(serde_json::json!({ + "error": format!("Failed to register project: {:?}", e) + })) + } } } #[tokio::main] async fn main() -> anyhow::Result<()> { - // Initialize environment and logging dotenv().ok(); - tracing_subscriber::fmt() - .with_env_filter(EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("timefusion=debug,pgwire=trace,datafusion=debug"))) - .init(); + + // Initialize tracing & metrics + telemetry::init_telemetry(); info!("Starting TimeFusion application"); - // Initialize database let db = Database::new().await?; info!("Database initialized successfully"); - // Create and setup session context let session_context = db.create_session_context(); db.setup_session_context(&session_context)?; info!("Session context setup complete"); - // Wrap database in Arc for sharing let db = Arc::new(db); let app_info = web::Data::new(AppInfo {}); - // Setup cancellation token for clean shutdown let shutdown_token = CancellationToken::new(); let http_shutdown = shutdown_token.clone(); - // Start PGWire server - let pg_port = env::var("PGWIRE_PORT").unwrap_or_else(|_| "5432".to_string()).parse::().unwrap_or(5432); - let pg_server = db.start_pgwire_server(session_context, pg_port, shutdown_token.clone()).await?; + let pg_port = env::var("PGWIRE_PORT") + .unwrap_or_else(|_| "5432".to_string()) + .parse::() + .unwrap_or(5432); + let pg_server = db + .start_pgwire_server(session_context.clone(), pg_port, shutdown_token.clone()) + .await?; - // Verify server started correctly tokio::time::sleep(Duration::from_secs(1)).await; if pg_server.is_finished() { error!("PGWire server failed to start, aborting..."); return Err(anyhow::anyhow!("PGWire server failed to start")); } - // Start HTTP server - let http_addr = format!("0.0.0.0:{}", env::var("PORT").unwrap_or_else(|_| "80".to_string())); + let http_addr = format!( + "0.0.0.0:{}", + env::var("PORT").unwrap_or_else(|_| "80".to_string()) + ); let http_server = HttpServer::new(move || { App::new() + .wrap(TracingLogger::default()) .wrap(Logger::default()) .app_data(web::Data::new(db.clone())) .app_data(app_info.clone()) @@ -104,7 +120,7 @@ async fn main() -> anyhow::Result<()> { } }; - let http_server_handle = server.handle(); + let http_handle = server.handle(); let http_task = tokio::spawn(async move { tokio::select! { _ = http_shutdown.cancelled() => info!("HTTP server shutting down."), @@ -115,14 +131,13 @@ async fn main() -> anyhow::Result<()> { } }); - // Wait for shutdown signal tokio::select! { _ = pg_server.map_err(|e| error!("PGWire server task failed: {:?}", e)) => {}, _ = http_task.map_err(|e| error!("HTTP server task failed: {:?}", e)) => {}, _ = tokio::signal::ctrl_c() => { info!("Received Ctrl+C, initiating shutdown."); shutdown_token.cancel(); - http_server_handle.stop(true).await; + http_handle.stop(true).await; sleep(Duration::from_secs(1)).await; } } diff --git a/src/telemetry.rs b/src/telemetry.rs new file mode 100644 index 0000000..756496d --- /dev/null +++ b/src/telemetry.rs @@ -0,0 +1,76 @@ +// src/telemetry.rs + +use opentelemetry_sdk::{ + trace::SdkTracerProvider, + Resource, +}; +use opentelemetry::trace::TracerProvider; // for `.tracer()` +use opentelemetry_otlp::{Protocol, WithExportConfig}; +use std::{env, time::Duration}; +use opentelemetry::KeyValue; +use tracing_subscriber::{fmt, layer::SubscriberExt, Registry}; +use opentelemetry_sdk::metrics::{PeriodicReader, SdkMeterProvider}; + +pub fn init_telemetry() { + // Read configuration from environment variables. + let otlp_trace_endpoint = env::var("OTEL_EXPORTER_OTLP_ENDPOINT") + .unwrap_or_else(|_| "http://localhost:4317".into()); + let otlp_metrics_endpoint = env::var("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT") + .unwrap_or_else(|_| otlp_trace_endpoint.clone()); + let service_name = env::var("OTEL_SERVICE_NAME") + .unwrap_or_else(|_| "timefusion".into()); + let resource_attrs = env::var("OTEL_RESOURCE_ATTRIBUTES").unwrap_or_default(); + + // Build resource using the public builder API. + let resource = Resource::builder() + .with_attributes(vec![ + KeyValue::new("service.name", service_name), + KeyValue::new("at-project-key", resource_attrs), + ]) + .build(); + + // --- Setup OTLP Tracing --- + let trace_exporter = opentelemetry_otlp::SpanExporter::builder() + .with_tonic() + .with_endpoint(otlp_trace_endpoint) + .with_protocol(Protocol::Grpc) + .build() + .expect("Failed to create OTLP trace exporter"); + + let tracer_provider = SdkTracerProvider::builder() + .with_batch_exporter(trace_exporter) + .with_resource(resource.clone()) + .build(); + + let sdk_tracer = tracer_provider.tracer("timefusion_tracer"); + + opentelemetry::global::set_tracer_provider(tracer_provider); + + let otel_layer = tracing_opentelemetry::layer().with_tracer(sdk_tracer); + let fmt_layer = fmt::layer(); + let subscriber = Registry::default() + .with(tracing_subscriber::EnvFilter::from_default_env()) + .with(fmt_layer) + .with(otel_layer); + tracing::subscriber::set_global_default(subscriber) + .expect("Failed to set global tracing subscriber"); + + // --- Setup OTLP Metrics --- + let metric_exporter = opentelemetry_otlp::MetricExporter::builder() + .with_tonic() + .with_endpoint(otlp_metrics_endpoint) + .with_protocol(Protocol::Grpc) + .build() + .expect("Failed to create OTLP metric exporter"); + + let reader = PeriodicReader::builder(metric_exporter) + .with_interval(Duration::from_secs(60)) + .build(); + + let meter_provider = SdkMeterProvider::builder() + .with_reader(reader) + .with_resource(resource) + .build(); + + opentelemetry::global::set_meter_provider(meter_provider); +} From 1a3b7c3eb22f7898619ecfa08bf8e3b14a114d24 Mon Sep 17 00:00:00 2001 From: Oluwapeluwa Ibrahim Date: Thu, 10 Apr 2025 00:41:36 +0100 Subject: [PATCH 02/14] otel support: updated README --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 4043639..3de95b9 100644 --- a/README.md +++ b/README.md @@ -16,5 +16,9 @@ Timefusion can be configured using the following environment variables: | `AWS_S3_BUCKET` | AWS S3 bucket name | Required | | `AWS_S3_ENDPOINT` | AWS S3 endpoint URL | `https://s3.amazonaws.com` | | `QUEUE_DB_PATH` | Path to the persistent queue database | `/app/queue_db` | +| `OTEL_SERVICE_NAME` | The service name reported to OpenTelemetry | `timefusion` | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | OTLP endpoint for sending traces & metrics | `hhttp://otelcol.apitoolkit.io:4317/` | +| `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` | (Optional) Separate OTLP endpoint for metrics | Same as `OTEL_EXPORTER_OTLP_ENDPOINT` | +| `OTEL_RESOURCE_ATTRIBUTES` | Resource attributes for OTEL (set to your API Toolkit key) | `${APITOOLKIT_API_KEY}` | For local development, you can set `QUEUE_DB_PATH` to a location in your development environment. From 095ca723ba1e3e24013c9f2c307a0cdf5fd1e572 Mon Sep 17 00:00:00 2001 From: Oluwapeluwa Ibrahim Date: Thu, 10 Apr 2025 08:15:06 +0100 Subject: [PATCH 03/14] fix:default telementry endpoint --- README.md | 2 +- src/telemetry.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3de95b9..640059e 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Timefusion can be configured using the following environment variables: | `AWS_S3_ENDPOINT` | AWS S3 endpoint URL | `https://s3.amazonaws.com` | | `QUEUE_DB_PATH` | Path to the persistent queue database | `/app/queue_db` | | `OTEL_SERVICE_NAME` | The service name reported to OpenTelemetry | `timefusion` | -| `OTEL_EXPORTER_OTLP_ENDPOINT` | OTLP endpoint for sending traces & metrics | `hhttp://otelcol.apitoolkit.io:4317/` | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | OTLP endpoint for sending traces & metrics | `http://otelcol.apitoolkit.io:4317` | | `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` | (Optional) Separate OTLP endpoint for metrics | Same as `OTEL_EXPORTER_OTLP_ENDPOINT` | | `OTEL_RESOURCE_ATTRIBUTES` | Resource attributes for OTEL (set to your API Toolkit key) | `${APITOOLKIT_API_KEY}` | diff --git a/src/telemetry.rs b/src/telemetry.rs index 756496d..902da0a 100644 --- a/src/telemetry.rs +++ b/src/telemetry.rs @@ -4,7 +4,7 @@ use opentelemetry_sdk::{ trace::SdkTracerProvider, Resource, }; -use opentelemetry::trace::TracerProvider; // for `.tracer()` +use opentelemetry::trace::TracerProvider; use opentelemetry_otlp::{Protocol, WithExportConfig}; use std::{env, time::Duration}; use opentelemetry::KeyValue; @@ -14,7 +14,7 @@ use opentelemetry_sdk::metrics::{PeriodicReader, SdkMeterProvider}; pub fn init_telemetry() { // Read configuration from environment variables. let otlp_trace_endpoint = env::var("OTEL_EXPORTER_OTLP_ENDPOINT") - .unwrap_or_else(|_| "http://localhost:4317".into()); + .unwrap_or_else(|_| "http://otelcol.apitoolkit.io:4317".into()); let otlp_metrics_endpoint = env::var("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT") .unwrap_or_else(|_| otlp_trace_endpoint.clone()); let service_name = env::var("OTEL_SERVICE_NAME") From 8f2247ce9a5e586a97a2189b4a75cb3ae8e03565 Mon Sep 17 00:00:00 2001 From: Oluwapeluwa Ibrahim Date: Thu, 10 Apr 2025 11:20:38 +0100 Subject: [PATCH 04/14] feat: Enhance error handling and graceful shutdown in TimeFusion - Introduced centralized error handling with `TimeFusionError` enum in new `error.rs` module - Updated `main.rs` and `database.rs` to use custom `Result` type for consistent error propagation - Added `flush_pending_writes` to `Database` for ensuring data consistency during shutdown - Improved graceful shutdown in `main.rs` with database write monitoring and proper channel handling - Removed unused `AppInfo` struct and `shutdown_tx` from `AppState` to eliminate dead code warnings - Fixed move errors and unused variable warnings related to `shutdown_tx` and `shutdown_rx` - Updated `lib.rs` to export `error` module --- Cargo.lock | 1 + Cargo.toml | 1 + src/database.rs | 81 ++++++++++++++++++++++++++++++------------------ src/error.rs | 40 ++++++++++++++++++++++++ src/lib.rs | 1 + src/main.rs | 72 ++++++++++++++++++++++++------------------ src/telemetry.rs | 2 +- 7 files changed, 137 insertions(+), 61 deletions(-) create mode 100644 src/error.rs diff --git a/Cargo.lock b/Cargo.lock index 1f36c56..b105e02 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6322,6 +6322,7 @@ dependencies = [ "sqlparser 0.55.0", "task", "tempfile", + "thiserror 2.0.12", "tokio", "tokio-postgres", "tokio-rustls 0.26.2", diff --git a/Cargo.toml b/Cargo.toml index d4fc5dc..b4f6581 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,6 +61,7 @@ aws-sdk-s3 = "1.3.0" url = "2.5.4" datafusion-common = "46.0.0" tracing-actix-web = "0.7.16" +thiserror = "2.0.12" [dev-dependencies] serial_test = "3.2.0" diff --git a/src/database.rs b/src/database.rs index 9e230fd..dc84d53 100644 --- a/src/database.rs +++ b/src/database.rs @@ -1,5 +1,5 @@ use crate::persistent_queue::OtelLogsAndSpans; -use anyhow::Result; +use crate::error::{Result, TimeFusionError}; use arrow_schema::SchemaRef; use async_trait::async_trait; use datafusion::arrow::array::Array; @@ -48,7 +48,8 @@ impl Clone for Database { impl Database { #[tracing::instrument(name = "db.new", skip())] pub async fn new() -> Result { - let bucket = env::var("AWS_S3_BUCKET").expect("AWS_S3_BUCKET environment variable not set"); + let bucket = env::var("AWS_S3_BUCKET") + .map_err(|_| TimeFusionError::Config("AWS_S3_BUCKET environment variable not set".to_string()))?; let aws_endpoint = env::var("AWS_S3_ENDPOINT").unwrap_or_else(|_| "https://s3.amazonaws.com".to_string()); // Generate a unique prefix for this run's data @@ -56,7 +57,8 @@ impl Database { let storage_uri = format!("s3://{}/{}/?endpoint={}", bucket, prefix, aws_endpoint); info!("Storage URI configured: {}", storage_uri); - let aws_url = Url::parse(&aws_endpoint).expect("AWS endpoint must be a valid URL"); + let aws_url = Url::parse(&aws_endpoint) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; deltalake::aws::register_handlers(Some(aws_url)); info!("AWS handlers registered"); @@ -84,7 +86,8 @@ impl Database { } } - let bucket = env::var("AWS_S3_BUCKET").expect("AWS_S3_BUCKET environment variable not set"); + let bucket = env::var("AWS_S3_BUCKET") + .map_err(|_| TimeFusionError::Config("AWS_S3_BUCKET environment variable not set".to_string()))?; let aws_endpoint = env::var("AWS_S3_ENDPOINT").unwrap_or_else(|_| "https://s3.amazonaws.com".to_string()); // Generate a unique prefix for this run's data @@ -92,7 +95,8 @@ impl Database { let storage_uri = format!("s3://{}/{}/?endpoint={}", bucket, prefix, aws_endpoint); info!("Storage URI configured: {}", storage_uri); - let aws_url = Url::parse(&aws_endpoint).expect("AWS endpoint must be a valid URL"); + let aws_url = Url::parse(&aws_endpoint) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; deltalake::aws::register_handlers(Some(aws_url)); info!("AWS handlers registered"); @@ -103,8 +107,10 @@ impl Database { }; // For tests, pass credentials explicitly - let access_key = env::var("AWS_ACCESS_KEY_ID").expect("AWS_ACCESS_KEY_ID not set"); - let secret_key = env::var("AWS_SECRET_ACCESS_KEY").expect("AWS_SECRET_ACCESS_KEY not set"); + let access_key = env::var("AWS_ACCESS_KEY_ID") + .map_err(|_| TimeFusionError::Config("AWS_ACCESS_KEY_ID not set".to_string()))?; + let secret_key = env::var("AWS_SECRET_ACCESS_KEY") + .map_err(|_| TimeFusionError::Config("AWS_SECRET_ACCESS_KEY not set".to_string()))?; info!("Registering project with explicit credentials"); db.register_project("default", &storage_uri, Some(&access_key), Some(&secret_key), Some(&aws_endpoint)).await?; @@ -206,7 +212,8 @@ impl Database { let pg_service = Arc::new(DfSessionService::new(session_context)); let handler_factory = Arc::new(HandlerFactory(pg_service.clone())); - let pg_listener = TcpListener::bind(format!("0.0.0.0:{}", port)).await?; + let pg_listener = TcpListener::bind(format!("0.0.0.0:{}", port)).await + .map_err(TimeFusionError::Io)?; info!("PGWire server running on 0.0.0.0:{}", port); let pgwire_shutdown = shutdown_token.clone(); @@ -278,20 +285,22 @@ impl Database { // Get the table reference for the default project let (_conn_str, _options, table_ref) = { let configs = self.project_configs.read().await; - configs.get("default").ok_or_else(|| anyhow::anyhow!("Project ID '{}' not found", "default"))?.clone() + configs.get("default") + .ok_or_else(|| TimeFusionError::Generic(anyhow::anyhow!("Project ID 'default' not found")))? + .clone() }; let mut table = table_ref.write().await; let ops = DeltaOps(table.clone()); let write_op = ops.write(batch).with_partition_columns(OtelLogsAndSpans::partitions()); - *table = write_op.await?; + *table = write_op.await + .map_err(TimeFusionError::Database)?; Ok(()) } #[cfg(test)] - #[tracing::instrument(name = "db.insert_records", skip(self, records))] pub async fn insert_records(&self, records: &Vec) -> Result<()> { // TODO: insert records doesn't need to accept a project_id as they can be read from the @@ -302,8 +311,10 @@ impl Database { use serde_arrow::schema::SchemaLike; // Convert OtelLogsAndSpans records to Arrow RecordBatch format - let fields = Vec::::from_type::(serde_arrow::schema::TracingOptions::default())?; - let batch = serde_arrow::to_record_batch(&fields, &records)?; + let fields = Vec::::from_type::(serde_arrow::schema::TracingOptions::default()) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to create schema fields: {}", e)))?; + let batch = serde_arrow::to_record_batch(&fields, &records) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to convert to record batch: {}", e)))?; // Call insert_records_batch with the converted batch to reuse common insertion logic self.insert_records_batch("default", vec![batch]).await @@ -329,20 +340,27 @@ impl Database { storage_options.0.insert("AWS_ALLOW_HTTP".to_string(), "true".to_string()); - let table = match DeltaTableBuilder::from_uri(conn_str).with_storage_options(storage_options.0.clone()).with_allow_http(true).load().await { + let table = match DeltaTableBuilder::from_uri(conn_str) + .with_storage_options(storage_options.0.clone()) + .with_allow_http(true) + .load() + .await + { Ok(table) => table, Err(err) => { log::warn!("table doesn't exist. creating new table. err: {:?}", err); // Create the table with project_id partitioning only for now // Timestamp partitioning is likely causing issues with nanosecond precision - let delta_ops = DeltaOps::try_from_uri(&conn_str).await?; + let delta_ops = DeltaOps::try_from_uri(&conn_str).await + .map_err(TimeFusionError::Database)?; delta_ops .create() .with_columns(OtelLogsAndSpans::columns().unwrap_or_default()) .with_partition_columns(OtelLogsAndSpans::partitions()) .with_storage_options(storage_options.0.clone()) - .await? + .await + .map_err(TimeFusionError::Database)? } }; @@ -350,6 +368,22 @@ impl Database { configs.insert(project_id.to_string(), (conn_str.to_string(), storage_options, Arc::new(RwLock::new(table)))); Ok(()) } + + /// Flushes any pending writes to Delta Lake for all projects + #[tracing::instrument(name = "db.flush_pending_writes", skip(self))] + pub async fn flush_pending_writes(&self) -> Result<()> { + let configs = self.project_configs.read().await; + for (project_id, (_, _, table)) in configs.iter() { + let mut table = table.write().await; + // Delta Lake doesn't have an explicit flush method, but we can ensure + // the table is up-to-date by loading its latest state + *table = deltalake::open_table(&table.table_uri()) + .await + .map_err(TimeFusionError::Database)?; + debug!("Flushed pending writes for project: {}", project_id); + } + Ok(()) + } } #[derive(Debug, Clone)] @@ -410,16 +444,6 @@ impl ProjectRoutingTable { } None } - // // Recursive: AND, OR expressions - // Expr::BooleanQuery { operands, .. } => { - // for operand in operands { - // if let Some(project_id) = self.extract_project_id(operand) { - // return Some(project_id); - // } - // } - // None - // } - // Look inside NOT expressions Expr::Not(inner) => self.extract_project_id(inner), _ => None, } @@ -432,10 +456,7 @@ impl DisplayAs for ProjectRoutingTable { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { write!(f, "ProjectRoutingTable ") - } // DisplayFormatType::TreeRender => { - // // TODO: collect info - // write!(f, "") - // } + } } } } diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..eb811a9 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,40 @@ +// error.rs +use thiserror::Error; +use datafusion::error::DataFusionError; +use actix_web::error::Error as ActixError; +use deltalake::DeltaTableError; +use std::io; + +#[derive(Error, Debug)] +pub enum TimeFusionError { + #[error("Database error: {0}")] + Database(#[from] DeltaTableError), + + #[error("DataFusion error: {0}")] + DataFusion(#[from] DataFusionError), + + #[error("HTTP error: {0}")] + Http(#[from] ActixError), + + #[error("IO error: {0}")] + Io(#[from] io::Error), + + #[error("Configuration error: {0}")] + Config(String), + + #[error("Generic error: {0}")] + Generic(#[from] anyhow::Error), +} + +impl actix_web::ResponseError for TimeFusionError { + fn error_response(&self) -> actix_web::HttpResponse { + match self { + TimeFusionError::Http(err) => err.error_response(), + _ => actix_web::HttpResponse::InternalServerError().json(serde_json::json!({ + "error": self.to_string() + })), + } + } +} + +pub type Result = std::result::Result; \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 8d4122b..11583f7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,3 +2,4 @@ pub mod database; pub mod persistent_queue; pub mod telemetry; +pub mod error; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index deecfd9..9f3a246 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,8 @@ // src/main.rs -mod telemetry; +mod telemetry; mod database; mod persistent_queue; +mod error; use actix_web::{App, HttpResponse, HttpServer, Responder, middleware::Logger, post, web}; use database::Database; use dotenv::dotenv; @@ -12,9 +13,14 @@ use tokio::time::{Duration, sleep}; use tokio_util::sync::CancellationToken; use tracing::{error, info}; use tracing_actix_web::TracingLogger; +use tokio::sync::mpsc; +use crate::error::{Result, TimeFusionError}; -#[derive(Clone)] -struct AppInfo {} +struct AppState { + db: Arc, +} + +struct ShutdownSignal; #[derive(Deserialize)] struct RegisterProjectRequest { @@ -27,15 +33,15 @@ struct RegisterProjectRequest { #[tracing::instrument( name = "HTTP /register_project", - skip(req, db), + skip(req, app_state), fields(project_id = %req.project_id) )] #[post("/register_project")] async fn register_project( req: web::Json, - db: web::Data>, -) -> impl Responder { - match db + app_state: web::Data, +) -> Result { + app_state.db .register_project( &req.project_id, &req.bucket, @@ -43,25 +49,16 @@ async fn register_project( Some(&req.secret_key), req.endpoint.as_deref(), ) - .await - { - Ok(()) => { - info!("Project registered successfully"); - HttpResponse::Ok().json(serde_json::json!({ - "message": format!("Project '{}' registered successfully", req.project_id) - })) - } - Err(e) => { - error!("Failed to register project: {:?}", e); - HttpResponse::InternalServerError().json(serde_json::json!({ - "error": format!("Failed to register project: {:?}", e) - })) - } - } + .await?; + + info!("Project registered successfully"); + Ok(HttpResponse::Ok().json(serde_json::json!({ + "message": format!("Project '{}' registered successfully", req.project_id) + }))) } #[tokio::main] -async fn main() -> anyhow::Result<()> { +async fn main() -> Result<()> { dotenv().ok(); // Initialize tracing & metrics @@ -77,11 +74,24 @@ async fn main() -> anyhow::Result<()> { info!("Session context setup complete"); let db = Arc::new(db); - let app_info = web::Data::new(AppInfo {}); - + let (shutdown_tx, _shutdown_rx) = mpsc::channel::(1); let shutdown_token = CancellationToken::new(); let http_shutdown = shutdown_token.clone(); + // Spawn database write monitor + let db_clone = db.clone(); + let shutdown_monitor = shutdown_token.clone(); + tokio::spawn(async move { + tokio::select! { + _ = shutdown_monitor.cancelled() => { + db_clone.flush_pending_writes().await.unwrap_or_else(|e| { + error!("Failed to flush pending writes: {:?}", e); + }); + info!("Database writes completed during shutdown"); + } + } + }); + let pg_port = env::var("PGWIRE_PORT") .unwrap_or_else(|_| "5432".to_string()) .parse::() @@ -93,7 +103,7 @@ async fn main() -> anyhow::Result<()> { tokio::time::sleep(Duration::from_secs(1)).await; if pg_server.is_finished() { error!("PGWire server failed to start, aborting..."); - return Err(anyhow::anyhow!("PGWire server failed to start")); + return Err(TimeFusionError::Generic(anyhow::anyhow!("PGWire server failed to start"))); } let http_addr = format!( @@ -104,8 +114,9 @@ async fn main() -> anyhow::Result<()> { App::new() .wrap(TracingLogger::default()) .wrap(Logger::default()) - .app_data(web::Data::new(db.clone())) - .app_data(app_info.clone()) + .app_data(web::Data::new(AppState { + db: db.clone(), + })) .service(register_project) }); @@ -116,7 +127,7 @@ async fn main() -> anyhow::Result<()> { } Err(e) => { error!("Failed to bind HTTP server to {}: {:?}", http_addr, e); - return Err(anyhow::anyhow!("Failed to bind HTTP server: {:?}", e)); + return Err(TimeFusionError::Io(e)); } }; @@ -137,6 +148,7 @@ async fn main() -> anyhow::Result<()> { _ = tokio::signal::ctrl_c() => { info!("Received Ctrl+C, initiating shutdown."); shutdown_token.cancel(); + let _ = shutdown_tx.send(ShutdownSignal).await; http_handle.stop(true).await; sleep(Duration::from_secs(1)).await; } @@ -144,4 +156,4 @@ async fn main() -> anyhow::Result<()> { info!("Shutdown complete."); Ok(()) -} +} \ No newline at end of file diff --git a/src/telemetry.rs b/src/telemetry.rs index 902da0a..028c8d2 100644 --- a/src/telemetry.rs +++ b/src/telemetry.rs @@ -1,4 +1,4 @@ -// src/telemetry.rs +// telemetry.rs use opentelemetry_sdk::{ trace::SdkTracerProvider, From 840fcbdc7046ffe7f48a272db549d21234a97e34 Mon Sep 17 00:00:00 2001 From: Oluwapeluwa Ibrahim Date: Thu, 10 Apr 2025 11:34:11 +0100 Subject: [PATCH 05/14] refactor: Update OtelLogsAndSpans for consistent error handling and type safety - Replaced anyhow::Result with custom TimeFusionError in columns() - Improved schema validation with runtime checks instead of asserts - Updated imports to include error module and removed unused serde_json::json --- benches/benchmarks.rs | 176 +++++++++++++++++++------------------- src/database.rs | 100 ++++++++++------------ src/error.rs | 9 +- src/lib.rs | 2 +- src/main.rs | 48 +++++------ src/persistent_queue.rs | 162 ++++++++++++++++++++--------------- src/telemetry.rs | 45 ++++------ tests/integration_test.rs | 9 +- tests/sqllogictest.rs | 11 +-- 9 files changed, 277 insertions(+), 285 deletions(-) diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index a03c664..17441e2 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -34,51 +34,51 @@ fn bench_batch_ingestion(c: &mut Criterion) { let mut records = Vec::with_capacity(batch_size); for _ in 0..batch_size { records.push(IngestRecord { - table_name: "bench_table".to_string(), - project_id: "bench_project".to_string(), - id: Uuid::new_v4().to_string(), - version: 1, - event_type: "bench_event".to_string(), - timestamp: "2025-03-11T12:00:00Z".to_string(), - trace_id: "trace".to_string(), - span_id: "span".to_string(), - parent_span_id: None, - trace_state: None, - start_time: "2025-03-11T12:00:00Z".to_string(), - end_time: Some("2025-03-11T12:00:01Z".to_string()), - duration_ns: 1_000_000_000, - span_name: "span_name".to_string(), - span_kind: "client".to_string(), - span_type: "bench".to_string(), - status: None, - status_code: 0, - status_message: "OK".to_string(), - severity_text: None, - severity_number: 0, - host: "localhost".to_string(), - url_path: "/".to_string(), - raw_url: "/".to_string(), - method: "GET".to_string(), - referer: "".to_string(), - path_params: None, - query_params: None, - request_headers: None, - response_headers: None, - request_body: None, - response_body: None, - endpoint_hash: "hash".to_string(), - shape_hash: "shape".to_string(), - format_hashes: vec!["fmt".to_string()], - field_hashes: vec!["field".to_string()], - sdk_type: "rust".to_string(), - service_version: None, - attributes: None, - events: None, - links: None, - resource: None, + table_name: "bench_table".to_string(), + project_id: "bench_project".to_string(), + id: Uuid::new_v4().to_string(), + version: 1, + event_type: "bench_event".to_string(), + timestamp: "2025-03-11T12:00:00Z".to_string(), + trace_id: "trace".to_string(), + span_id: "span".to_string(), + parent_span_id: None, + trace_state: None, + start_time: "2025-03-11T12:00:00Z".to_string(), + end_time: Some("2025-03-11T12:00:01Z".to_string()), + duration_ns: 1_000_000_000, + span_name: "span_name".to_string(), + span_kind: "client".to_string(), + span_type: "bench".to_string(), + status: None, + status_code: 0, + status_message: "OK".to_string(), + severity_text: None, + severity_number: 0, + host: "localhost".to_string(), + url_path: "/".to_string(), + raw_url: "/".to_string(), + method: "GET".to_string(), + referer: "".to_string(), + path_params: None, + query_params: None, + request_headers: None, + response_headers: None, + request_body: None, + response_body: None, + endpoint_hash: "hash".to_string(), + shape_hash: "shape".to_string(), + format_hashes: vec!["fmt".to_string()], + field_hashes: vec!["field".to_string()], + sdk_type: "rust".to_string(), + service_version: None, + attributes: None, + events: None, + links: None, + resource: None, instrumentation_scope: None, - errors: None, - tags: vec!["tag".to_string()], + errors: None, + tags: vec!["tag".to_string()], }); } @@ -110,51 +110,51 @@ fn bench_insertion_range(c: &mut Criterion) { let mut records = Vec::with_capacity(size); for _ in 0..size { records.push(IngestRecord { - table_name: "bench_table".to_string(), - project_id: "bench_project".to_string(), - id: Uuid::new_v4().to_string(), - version: 1, - event_type: "bench_event".to_string(), - timestamp: "2025-03-11T12:00:00Z".to_string(), - trace_id: "trace".to_string(), - span_id: "span".to_string(), - parent_span_id: None, - trace_state: None, - start_time: "2025-03-11T12:00:00Z".to_string(), - end_time: Some("2025-03-11T12:00:01Z".to_string()), - duration_ns: 1_000_000_000, - span_name: "span_name".to_string(), - span_kind: "client".to_string(), - span_type: "bench".to_string(), - status: None, - status_code: 0, - status_message: "OK".to_string(), - severity_text: None, - severity_number: 0, - host: "localhost".to_string(), - url_path: "/".to_string(), - raw_url: "/".to_string(), - method: "GET".to_string(), - referer: "".to_string(), - path_params: None, - query_params: None, - request_headers: None, - response_headers: None, - request_body: None, - response_body: None, - endpoint_hash: "hash".to_string(), - shape_hash: "shape".to_string(), - format_hashes: vec!["fmt".to_string()], - field_hashes: vec!["field".to_string()], - sdk_type: "rust".to_string(), - service_version: None, - attributes: None, - events: None, - links: None, - resource: None, + table_name: "bench_table".to_string(), + project_id: "bench_project".to_string(), + id: Uuid::new_v4().to_string(), + version: 1, + event_type: "bench_event".to_string(), + timestamp: "2025-03-11T12:00:00Z".to_string(), + trace_id: "trace".to_string(), + span_id: "span".to_string(), + parent_span_id: None, + trace_state: None, + start_time: "2025-03-11T12:00:00Z".to_string(), + end_time: Some("2025-03-11T12:00:01Z".to_string()), + duration_ns: 1_000_000_000, + span_name: "span_name".to_string(), + span_kind: "client".to_string(), + span_type: "bench".to_string(), + status: None, + status_code: 0, + status_message: "OK".to_string(), + severity_text: None, + severity_number: 0, + host: "localhost".to_string(), + url_path: "/".to_string(), + raw_url: "/".to_string(), + method: "GET".to_string(), + referer: "".to_string(), + path_params: None, + query_params: None, + request_headers: None, + response_headers: None, + request_body: None, + response_body: None, + endpoint_hash: "hash".to_string(), + shape_hash: "shape".to_string(), + format_hashes: vec!["fmt".to_string()], + field_hashes: vec!["field".to_string()], + sdk_type: "rust".to_string(), + service_version: None, + attributes: None, + events: None, + links: None, + resource: None, instrumentation_scope: None, - errors: None, - tags: vec!["tag".to_string()], + errors: None, + tags: vec!["tag".to_string()], }); } diff --git a/src/database.rs b/src/database.rs index dc84d53..1a61b35 100644 --- a/src/database.rs +++ b/src/database.rs @@ -1,33 +1,34 @@ -use crate::persistent_queue::OtelLogsAndSpans; -use crate::error::{Result, TimeFusionError}; +use std::{any::Any, collections::HashMap, env, fmt, sync::Arc}; + use arrow_schema::SchemaRef; use async_trait::async_trait; -use datafusion::arrow::array::Array; -use datafusion::common::SchemaExt; -use datafusion::common::not_impl_err; -use datafusion::execution::TaskContext; -use datafusion::execution::context::SessionContext; -use datafusion::logical_expr::{Expr, Operator, TableProviderFilterPushDown}; -use datafusion::physical_plan::DisplayAs; -use datafusion::physical_plan::insert::{DataSink, DataSinkExec}; -use datafusion::scalar::ScalarValue; use datafusion::{ + arrow::array::Array, catalog::Session, + common::{SchemaExt, not_impl_err}, datasource::{TableProvider, TableType}, error::{DataFusionError, Result as DFResult}, - logical_expr::{BinaryExpr, dml::InsertOp}, - physical_plan::{DisplayFormatType, ExecutionPlan, SendableRecordBatchStream}, + execution::{TaskContext, context::SessionContext}, + logical_expr::{BinaryExpr, Expr, Operator, TableProviderFilterPushDown, dml::InsertOp}, + physical_plan::{ + DisplayAs, DisplayFormatType, ExecutionPlan, SendableRecordBatchStream, + insert::{DataSink, DataSinkExec}, + }, + scalar::ScalarValue, }; use delta_kernel::arrow::record_batch::RecordBatch; use deltalake::{DeltaOps, DeltaTable, DeltaTableBuilder, storage::StorageOptions}; use futures::StreamExt; -use std::fmt; -use std::{any::Any, collections::HashMap, env, sync::Arc}; use tokio::sync::RwLock; use tokio_util::sync::CancellationToken; use tracing::{debug, error, info}; use url::Url; +use crate::{ + error::{Result, TimeFusionError}, + persistent_queue::OtelLogsAndSpans, +}; + type ProjectConfig = (String, StorageOptions, Arc>); pub type ProjectConfigs = Arc>>; @@ -48,8 +49,7 @@ impl Clone for Database { impl Database { #[tracing::instrument(name = "db.new", skip())] pub async fn new() -> Result { - let bucket = env::var("AWS_S3_BUCKET") - .map_err(|_| TimeFusionError::Config("AWS_S3_BUCKET environment variable not set".to_string()))?; + let bucket = env::var("AWS_S3_BUCKET").map_err(|_| TimeFusionError::Config("AWS_S3_BUCKET environment variable not set".to_string()))?; let aws_endpoint = env::var("AWS_S3_ENDPOINT").unwrap_or_else(|_| "https://s3.amazonaws.com".to_string()); // Generate a unique prefix for this run's data @@ -57,8 +57,7 @@ impl Database { let storage_uri = format!("s3://{}/{}/?endpoint={}", bucket, prefix, aws_endpoint); info!("Storage URI configured: {}", storage_uri); - let aws_url = Url::parse(&aws_endpoint) - .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; + let aws_url = Url::parse(&aws_endpoint).map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; deltalake::aws::register_handlers(Some(aws_url)); info!("AWS handlers registered"); @@ -86,8 +85,7 @@ impl Database { } } - let bucket = env::var("AWS_S3_BUCKET") - .map_err(|_| TimeFusionError::Config("AWS_S3_BUCKET environment variable not set".to_string()))?; + let bucket = env::var("AWS_S3_BUCKET").map_err(|_| TimeFusionError::Config("AWS_S3_BUCKET environment variable not set".to_string()))?; let aws_endpoint = env::var("AWS_S3_ENDPOINT").unwrap_or_else(|_| "https://s3.amazonaws.com".to_string()); // Generate a unique prefix for this run's data @@ -95,8 +93,7 @@ impl Database { let storage_uri = format!("s3://{}/{}/?endpoint={}", bucket, prefix, aws_endpoint); info!("Storage URI configured: {}", storage_uri); - let aws_url = Url::parse(&aws_endpoint) - .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; + let aws_url = Url::parse(&aws_endpoint).map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; deltalake::aws::register_handlers(Some(aws_url)); info!("AWS handlers registered"); @@ -107,10 +104,8 @@ impl Database { }; // For tests, pass credentials explicitly - let access_key = env::var("AWS_ACCESS_KEY_ID") - .map_err(|_| TimeFusionError::Config("AWS_ACCESS_KEY_ID not set".to_string()))?; - let secret_key = env::var("AWS_SECRET_ACCESS_KEY") - .map_err(|_| TimeFusionError::Config("AWS_SECRET_ACCESS_KEY not set".to_string()))?; + let access_key = env::var("AWS_ACCESS_KEY_ID").map_err(|_| TimeFusionError::Config("AWS_ACCESS_KEY_ID not set".to_string()))?; + let secret_key = env::var("AWS_SECRET_ACCESS_KEY").map_err(|_| TimeFusionError::Config("AWS_SECRET_ACCESS_KEY not set".to_string()))?; info!("Registering project with explicit credentials"); db.register_project("default", &storage_uri, Some(&access_key), Some(&secret_key), Some(&aws_endpoint)).await?; @@ -121,8 +116,7 @@ impl Database { /// Create and configure a SessionContext with DataFusion settings #[tracing::instrument(name = "db.create_session_context", skip(self))] pub fn create_session_context(&self) -> SessionContext { - use datafusion::config::ConfigOptions; - use datafusion::execution::context::SessionContext; + use datafusion::{config::ConfigOptions, execution::context::SessionContext}; let mut options = ConfigOptions::new(); let _ = options.set("datafusion.sql_parser.enable_information_schema", "true"); @@ -149,9 +143,11 @@ impl Database { /// Register PostgreSQL settings table for compatibility #[tracing::instrument(name = "db.register_pg_settings_table", skip(self, ctx))] pub fn register_pg_settings_table(&self, ctx: &SessionContext) -> datafusion::error::Result<()> { - use datafusion::arrow::array::StringArray; - use datafusion::arrow::datatypes::{DataType, Field, Schema}; - use datafusion::arrow::record_batch::RecordBatch; + use datafusion::arrow::{ + array::StringArray, + datatypes::{DataType, Field, Schema}, + record_batch::RecordBatch, + }; let schema = Arc::new(Schema::new(vec![ Field::new("name", DataType::Utf8, false), @@ -170,9 +166,13 @@ impl Database { /// Register set_config UDF for PostgreSQL compatibility #[tracing::instrument(name = "db.register_set_config_udf", skip(self, ctx))] pub fn register_set_config_udf(&self, ctx: &SessionContext) { - use datafusion::arrow::array::{StringArray, StringBuilder}; - use datafusion::arrow::datatypes::DataType; - use datafusion::logical_expr::{ColumnarValue, ScalarFunctionImplementation, Volatility, create_udf}; + use datafusion::{ + arrow::{ + array::{StringArray, StringBuilder}, + datatypes::DataType, + }, + logical_expr::{ColumnarValue, ScalarFunctionImplementation, Volatility, create_udf}, + }; let set_config_fn: ScalarFunctionImplementation = Arc::new(move |args: &[ColumnarValue]| -> datafusion::error::Result { let param_value_array = match &args[1] { @@ -212,8 +212,7 @@ impl Database { let pg_service = Arc::new(DfSessionService::new(session_context)); let handler_factory = Arc::new(HandlerFactory(pg_service.clone())); - let pg_listener = TcpListener::bind(format!("0.0.0.0:{}", port)).await - .map_err(TimeFusionError::Io)?; + let pg_listener = TcpListener::bind(format!("0.0.0.0:{}", port)).await.map_err(TimeFusionError::Io)?; info!("PGWire server running on 0.0.0.0:{}", port); let pgwire_shutdown = shutdown_token.clone(); @@ -285,7 +284,8 @@ impl Database { // Get the table reference for the default project let (_conn_str, _options, table_ref) = { let configs = self.project_configs.read().await; - configs.get("default") + configs + .get("default") .ok_or_else(|| TimeFusionError::Generic(anyhow::anyhow!("Project ID 'default' not found")))? .clone() }; @@ -294,8 +294,7 @@ impl Database { let ops = DeltaOps(table.clone()); let write_op = ops.write(batch).with_partition_columns(OtelLogsAndSpans::partitions()); - *table = write_op.await - .map_err(TimeFusionError::Database)?; + *table = write_op.await.map_err(TimeFusionError::Database)?; Ok(()) } @@ -340,20 +339,14 @@ impl Database { storage_options.0.insert("AWS_ALLOW_HTTP".to_string(), "true".to_string()); - let table = match DeltaTableBuilder::from_uri(conn_str) - .with_storage_options(storage_options.0.clone()) - .with_allow_http(true) - .load() - .await - { + let table = match DeltaTableBuilder::from_uri(conn_str).with_storage_options(storage_options.0.clone()).with_allow_http(true).load().await { Ok(table) => table, Err(err) => { log::warn!("table doesn't exist. creating new table. err: {:?}", err); // Create the table with project_id partitioning only for now // Timestamp partitioning is likely causing issues with nanosecond precision - let delta_ops = DeltaOps::try_from_uri(&conn_str).await - .map_err(TimeFusionError::Database)?; + let delta_ops = DeltaOps::try_from_uri(&conn_str).await.map_err(TimeFusionError::Database)?; delta_ops .create() .with_columns(OtelLogsAndSpans::columns().unwrap_or_default()) @@ -377,9 +370,7 @@ impl Database { let mut table = table.write().await; // Delta Lake doesn't have an explicit flush method, but we can ensure // the table is up-to-date by loading its latest state - *table = deltalake::open_table(&table.table_uri()) - .await - .map_err(TimeFusionError::Database)?; + *table = deltalake::open_table(&table.table_uri()).await.map_err(TimeFusionError::Database)?; debug!("Flushed pending writes for project: {}", project_id); } Ok(()) @@ -389,8 +380,8 @@ impl Database { #[derive(Debug, Clone)] pub struct ProjectRoutingTable { default_project: String, - database: Arc, - schema: SchemaRef, + database: Arc, + schema: SchemaRef, } impl ProjectRoutingTable { @@ -531,8 +522,7 @@ mod tests { use std::thread::sleep; use chrono::{TimeZone, Utc}; - use datafusion::assert_batches_eq; - use datafusion::prelude::SessionContext; + use datafusion::{assert_batches_eq, prelude::SessionContext}; use dotenv::dotenv; use serial_test::serial; use tokio::time; diff --git a/src/error.rs b/src/error.rs index eb811a9..96f9055 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,9 +1,10 @@ // error.rs -use thiserror::Error; -use datafusion::error::DataFusionError; +use std::io; + use actix_web::error::Error as ActixError; +use datafusion::error::DataFusionError; use deltalake::DeltaTableError; -use std::io; +use thiserror::Error; #[derive(Error, Debug)] pub enum TimeFusionError { @@ -37,4 +38,4 @@ impl actix_web::ResponseError for TimeFusionError { } } -pub type Result = std::result::Result; \ No newline at end of file +pub type Result = std::result::Result; diff --git a/src/lib.rs b/src/lib.rs index 11583f7..c2fcb88 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ // lib.rs - Export modules for use in tests pub mod database; +pub mod error; pub mod persistent_queue; pub mod telemetry; -pub mod error; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 9f3a246..bdce0fb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,19 +1,23 @@ // src/main.rs -mod telemetry; mod database; -mod persistent_queue; mod error; +mod persistent_queue; +mod telemetry; +use std::{env, sync::Arc}; + use actix_web::{App, HttpResponse, HttpServer, Responder, middleware::Logger, post, web}; use database::Database; use dotenv::dotenv; use futures::TryFutureExt; use serde::Deserialize; -use std::{env, sync::Arc}; -use tokio::time::{Duration, sleep}; +use tokio::{ + sync::mpsc, + time::{Duration, sleep}, +}; use tokio_util::sync::CancellationToken; use tracing::{error, info}; use tracing_actix_web::TracingLogger; -use tokio::sync::mpsc; + use crate::error::{Result, TimeFusionError}; struct AppState { @@ -25,10 +29,10 @@ struct ShutdownSignal; #[derive(Deserialize)] struct RegisterProjectRequest { project_id: String, - bucket: String, + bucket: String, access_key: String, secret_key: String, - endpoint: Option, + endpoint: Option, } #[tracing::instrument( @@ -37,11 +41,9 @@ struct RegisterProjectRequest { fields(project_id = %req.project_id) )] #[post("/register_project")] -async fn register_project( - req: web::Json, - app_state: web::Data, -) -> Result { - app_state.db +async fn register_project(req: web::Json, app_state: web::Data) -> Result { + app_state + .db .register_project( &req.project_id, &req.bucket, @@ -50,7 +52,7 @@ async fn register_project( req.endpoint.as_deref(), ) .await?; - + info!("Project registered successfully"); Ok(HttpResponse::Ok().json(serde_json::json!({ "message": format!("Project '{}' registered successfully", req.project_id) @@ -92,13 +94,8 @@ async fn main() -> Result<()> { } }); - let pg_port = env::var("PGWIRE_PORT") - .unwrap_or_else(|_| "5432".to_string()) - .parse::() - .unwrap_or(5432); - let pg_server = db - .start_pgwire_server(session_context.clone(), pg_port, shutdown_token.clone()) - .await?; + let pg_port = env::var("PGWIRE_PORT").unwrap_or_else(|_| "5432".to_string()).parse::().unwrap_or(5432); + let pg_server = db.start_pgwire_server(session_context.clone(), pg_port, shutdown_token.clone()).await?; tokio::time::sleep(Duration::from_secs(1)).await; if pg_server.is_finished() { @@ -106,17 +103,12 @@ async fn main() -> Result<()> { return Err(TimeFusionError::Generic(anyhow::anyhow!("PGWire server failed to start"))); } - let http_addr = format!( - "0.0.0.0:{}", - env::var("PORT").unwrap_or_else(|_| "80".to_string()) - ); + let http_addr = format!("0.0.0.0:{}", env::var("PORT").unwrap_or_else(|_| "80".to_string())); let http_server = HttpServer::new(move || { App::new() .wrap(TracingLogger::default()) .wrap(Logger::default()) - .app_data(web::Data::new(AppState { - db: db.clone(), - })) + .app_data(web::Data::new(AppState { db: db.clone() })) .service(register_project) }); @@ -156,4 +148,4 @@ async fn main() -> Result<()> { info!("Shutdown complete."); Ok(()) -} \ No newline at end of file +} diff --git a/src/persistent_queue.rs b/src/persistent_queue.rs index d9991f9..cc5ed0a 100644 --- a/src/persistent_queue.rs +++ b/src/persistent_queue.rs @@ -1,12 +1,11 @@ use std::sync::Arc; -use arrow_schema::{DataType, TimeUnit}; -use arrow_schema::{Field, Schema, SchemaRef}; +use arrow_schema::{DataType, Field, Schema, SchemaRef, TimeUnit}; use delta_kernel::schema::StructField; use serde::{Deserialize, Serialize}; -use serde_arrow::schema::SchemaLike; -use serde_arrow::schema::TracingOptions; +use serde_arrow::schema::{SchemaLike, TracingOptions}; use serde_json::json; +use crate::error::{Result, TimeFusionError}; #[allow(non_snake_case)] #[derive(Serialize, Deserialize, Clone, Default)] @@ -14,32 +13,32 @@ pub struct OtelLogsAndSpans { #[serde(with = "chrono::serde::ts_microseconds_option")] pub observed_timestamp: Option>, - pub id: String, - pub parent_id: Option, - pub name: Option, - pub kind: Option, - pub status_code: Option, + pub id: String, + pub parent_id: Option, + pub name: Option, + pub kind: Option, + pub status_code: Option, pub status_message: Option, // Logs specific - pub level: Option, // same as severity text - pub severity___severity_text: Option, + pub level: Option, // same as severity text + pub severity___severity_text: Option, pub severity___severity_number: Option, - pub body: Option, // body as json json + pub body: Option, // body as json pub duration: Option, // nanoseconds #[serde(with = "chrono::serde::ts_microseconds_option")] pub start_time: Option>, #[serde(with = "chrono::serde::ts_microseconds_option")] - pub end_time: Option>, + pub end_time: Option>, // Context - pub context___trace_id: Option, - pub context___span_id: Option, + pub context___trace_id: Option, + pub context___span_id: Option, pub context___trace_state: Option, pub context___trace_flags: Option, - pub context___is_remote: Option, + pub context___is_remote: Option, // Events pub events: Option, // events json @@ -51,90 +50,89 @@ pub struct OtelLogsAndSpans { // Server and client pub attributes___client___address: Option, - pub attributes___client___port: Option, + pub attributes___client___port: Option, pub attributes___server___address: Option, - pub attributes___server___port: Option, + pub attributes___server___port: Option, // network https://opentelemetry.io/docs/specs/semconv/attributes-registry/network/ - pub attributes___network___local__address: Option, - pub attributes___network___local__port: Option, - pub attributes___network___peer___address: Option, - pub attributes___network___peer__port: Option, - pub attributes___network___protocol___name: Option, + pub attributes___network___local__address: Option, + pub attributes___network___local__port: Option, + pub attributes___network___peer___address: Option, + pub attributes___network___peer__port: Option, + pub attributes___network___protocol___name: Option, pub attributes___network___protocol___version: Option, - pub attributes___network___transport: Option, - pub attributes___network___type: Option, + pub attributes___network___transport: Option, + pub attributes___network___type: Option, // Source Code Attributes - pub attributes___code___number: Option, - pub attributes___code___file___path: Option, + pub attributes___code___number: Option, + pub attributes___code___file___path: Option, pub attributes___code___function___name: Option, - pub attributes___code___line___number: Option, - pub attributes___code___stacktrace: Option, - + pub attributes___code___line___number: Option, + pub attributes___code___stacktrace: Option, // Log records. https://opentelemetry.io/docs/specs/semconv/general/logs/ pub attributes___log__record___original: Option, - pub attributes___log__record___uid: Option, + pub attributes___log__record___uid: Option, // Exception https://opentelemetry.io/docs/specs/semconv/exceptions/exceptions-logs/ - pub attributes___error___type: Option, - pub attributes___exception___type: Option, - pub attributes___exception___message: Option, + pub attributes___error___type: Option, + pub attributes___exception___type: Option, + pub attributes___exception___message: Option, pub attributes___exception___stacktrace: Option, // URL https://opentelemetry.io/docs/specs/semconv/attributes-registry/url/ pub attributes___url___fragment: Option, - pub attributes___url___full: Option, - pub attributes___url___path: Option, - pub attributes___url___query: Option, - pub attributes___url___scheme: Option, + pub attributes___url___full: Option, + pub attributes___url___path: Option, + pub attributes___url___query: Option, + pub attributes___url___scheme: Option, // Useragent https://opentelemetry.io/docs/specs/semconv/attributes-registry/user-agent/ pub attributes___user_agent___original: Option, // HTTP https://opentelemetry.io/docs/specs/semconv/http/http-spans/ - pub attributes___http___request___method: Option, + pub attributes___http___request___method: Option, pub attributes___http___request___method_original: Option, - pub attributes___http___response___status_code: Option, - pub attributes___http___request___resend_count: Option, - pub attributes___http___request___body___size: Option, + pub attributes___http___response___status_code: Option, + pub attributes___http___request___resend_count: Option, + pub attributes___http___request___body___size: Option, // Session https://opentelemetry.io/docs/specs/semconv/general/session/ - pub attributes___session___id: Option, + pub attributes___session___id: Option, pub attributes___session___previous___id: Option, // Database https://opentelemetry.io/docs/specs/semconv/database/database-spans/ - pub attributes___db___system___name: Option, - pub attributes___db___collection___name: Option, - pub attributes___db___namespace: Option, - pub attributes___db___operation___name: Option, - pub attributes___db___response___status_code: Option, + pub attributes___db___system___name: Option, + pub attributes___db___collection___name: Option, + pub attributes___db___namespace: Option, + pub attributes___db___operation___name: Option, + pub attributes___db___response___status_code: Option, pub attributes___db___operation___batch___size: Option, - pub attributes___db___query___summary: Option, - pub attributes___db___query___text: Option, + pub attributes___db___query___summary: Option, + pub attributes___db___query___text: Option, // https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ - pub attributes___user___id: Option, - pub attributes___user___email: Option, + pub attributes___user___id: Option, + pub attributes___user___email: Option, pub attributes___user___full_name: Option, - pub attributes___user___name: Option, - pub attributes___user___hash: Option, + pub attributes___user___name: Option, + pub attributes___user___hash: Option, // Resource Attributes (subset) https://opentelemetry.io/docs/specs/semconv/resource/ - pub resource___attributes___service___name: Option, - pub resource___attributes___service___version: Option, + pub resource___attributes___service___name: Option, + pub resource___attributes___service___version: Option, pub resource___attributes___service___instance___id: Option, - pub resource___attributes___service___namespace: Option, + pub resource___attributes___service___namespace: Option, pub resource___attributes___telemetry___sdk___language: Option, - pub resource___attributes___telemetry___sdk___name: Option, - pub resource___attributes___telemetry___sdk___version: Option, + pub resource___attributes___telemetry___sdk___name: Option, + pub resource___attributes___telemetry___sdk___version: Option, pub resource___attributes___user_agent___original: Option, // Kept at the bottom to make delta-rs happy, so its schema matches datafusion. // Seems delta removes the partition ids from the normal schema and moves them to the end. // Top-level fields - pub project_id: String, + pub project_id: String, #[serde(with = "chrono::serde::ts_microseconds")] pub timestamp: chrono::DateTime, @@ -144,31 +142,53 @@ impl OtelLogsAndSpans { pub fn table_name() -> String { "otel_logs_and_spans".to_string() } - pub fn columns() -> anyhow::Result> { + + pub fn columns() -> Result> { + // Use custom Result let tracing_options = TracingOptions::default() - .overwrite("project_id", json!({"name": "project_id", "data_type": "Utf8", "nullable": false}))? + .overwrite("project_id", json!({"name": "project_id", "data_type": "Utf8", "nullable": false})) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to overwrite project_id: {}", e)))? .overwrite( "timestamp", json!({"name": "timestamp", "data_type": "Timestamp(Microsecond, None)", "nullable": false}), - )? - .overwrite("id", json!({"name": "id", "data_type": "Utf8", "nullable": false}))? + ) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to overwrite timestamp: {}", e)))? + .overwrite("id", json!({"name": "id", "data_type": "Utf8", "nullable": false})) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to overwrite id: {}", e)))? .overwrite( "observed_timestamp", json!({"name": "observed_timestamp", "data_type": "Timestamp(Microsecond, None)", "nullable": true}), - )? + ) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to overwrite observed_timestamp: {}", e)))? .overwrite( "start_time", json!({"name": "start_time", "data_type": "Timestamp(Microsecond, None)", "nullable": true}), - )? + ) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to overwrite start_time: {}", e)))? .overwrite( "end_time", json!({"name": "end_time", "data_type": "Timestamp(Microsecond, None)", "nullable": true}), - )?; + ) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to overwrite end_time: {}", e)))?; + + let fields = Vec::::from_type::(tracing_options) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to generate fields: {}", e)))?; + let vec_refs: Vec = fields + .iter() + .map(|arc_field| arc_field.as_ref().try_into()) + .collect::, _>>() + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to convert fields to StructField: {}", e)))?; + + // Validate the last two fields as a sanity check + if fields.len() < 2 + || fields[fields.len() - 2].data_type() != &DataType::Utf8 + || fields[fields.len() - 1].data_type() != &DataType::Timestamp(TimeUnit::Microsecond, None) + { + return Err(TimeFusionError::Generic(anyhow::anyhow!( + "Schema validation failed: expected project_id (Utf8) and timestamp (Timestamp) at end" + ))); + } - let fields = Vec::::from_type::(tracing_options)?; - let vec_refs: Vec = fields.iter().map(|arc_field| arc_field.as_ref().try_into().unwrap()).collect(); - assert_eq!(fields[fields.len() - 2].data_type(), &DataType::Utf8); - assert_eq!(fields[fields.len() - 1].data_type(), &DataType::Timestamp(TimeUnit::Microsecond, None)); Ok(vec_refs) } diff --git a/src/telemetry.rs b/src/telemetry.rs index 028c8d2..5f69887 100644 --- a/src/telemetry.rs +++ b/src/telemetry.rs @@ -1,24 +1,21 @@ // telemetry.rs +use std::{env, time::Duration}; + +use opentelemetry::{KeyValue, trace::TracerProvider}; +use opentelemetry_otlp::{Protocol, WithExportConfig}; use opentelemetry_sdk::{ - trace::SdkTracerProvider, Resource, + metrics::{PeriodicReader, SdkMeterProvider}, + trace::SdkTracerProvider, }; -use opentelemetry::trace::TracerProvider; -use opentelemetry_otlp::{Protocol, WithExportConfig}; -use std::{env, time::Duration}; -use opentelemetry::KeyValue; -use tracing_subscriber::{fmt, layer::SubscriberExt, Registry}; -use opentelemetry_sdk::metrics::{PeriodicReader, SdkMeterProvider}; +use tracing_subscriber::{Registry, fmt, layer::SubscriberExt}; pub fn init_telemetry() { // Read configuration from environment variables. - let otlp_trace_endpoint = env::var("OTEL_EXPORTER_OTLP_ENDPOINT") - .unwrap_or_else(|_| "http://otelcol.apitoolkit.io:4317".into()); - let otlp_metrics_endpoint = env::var("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT") - .unwrap_or_else(|_| otlp_trace_endpoint.clone()); - let service_name = env::var("OTEL_SERVICE_NAME") - .unwrap_or_else(|_| "timefusion".into()); + let otlp_trace_endpoint = env::var("OTEL_EXPORTER_OTLP_ENDPOINT").unwrap_or_else(|_| "http://otelcol.apitoolkit.io:4317".into()); + let otlp_metrics_endpoint = env::var("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT").unwrap_or_else(|_| otlp_trace_endpoint.clone()); + let service_name = env::var("OTEL_SERVICE_NAME").unwrap_or_else(|_| "timefusion".into()); let resource_attrs = env::var("OTEL_RESOURCE_ATTRIBUTES").unwrap_or_default(); // Build resource using the public builder API. @@ -37,10 +34,7 @@ pub fn init_telemetry() { .build() .expect("Failed to create OTLP trace exporter"); - let tracer_provider = SdkTracerProvider::builder() - .with_batch_exporter(trace_exporter) - .with_resource(resource.clone()) - .build(); + let tracer_provider = SdkTracerProvider::builder().with_batch_exporter(trace_exporter).with_resource(resource.clone()).build(); let sdk_tracer = tracer_provider.tracer("timefusion_tracer"); @@ -48,12 +42,8 @@ pub fn init_telemetry() { let otel_layer = tracing_opentelemetry::layer().with_tracer(sdk_tracer); let fmt_layer = fmt::layer(); - let subscriber = Registry::default() - .with(tracing_subscriber::EnvFilter::from_default_env()) - .with(fmt_layer) - .with(otel_layer); - tracing::subscriber::set_global_default(subscriber) - .expect("Failed to set global tracing subscriber"); + let subscriber = Registry::default().with(tracing_subscriber::EnvFilter::from_default_env()).with(fmt_layer).with(otel_layer); + tracing::subscriber::set_global_default(subscriber).expect("Failed to set global tracing subscriber"); // --- Setup OTLP Metrics --- let metric_exporter = opentelemetry_otlp::MetricExporter::builder() @@ -63,14 +53,9 @@ pub fn init_telemetry() { .build() .expect("Failed to create OTLP metric exporter"); - let reader = PeriodicReader::builder(metric_exporter) - .with_interval(Duration::from_secs(60)) - .build(); + let reader = PeriodicReader::builder(metric_exporter).with_interval(Duration::from_secs(60)).build(); - let meter_provider = SdkMeterProvider::builder() - .with_reader(reader) - .with_resource(resource) - .build(); + let meter_provider = SdkMeterProvider::builder().with_reader(reader).with_resource(resource).build(); opentelemetry::global::set_meter_provider(meter_provider); } diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 21b950e..91ad788 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -1,13 +1,16 @@ #[cfg(test)] mod integration { + use std::{ + collections::HashSet, + sync::{Arc, Mutex}, + time::{Duration, Instant}, + }; + use anyhow::Result; use dotenv::dotenv; use rand::Rng; use scopeguard; use serial_test::serial; - use std::collections::HashSet; - use std::sync::{Arc, Mutex}; - use std::time::{Duration, Instant}; use timefusion::database::Database; use tokio::{sync::Notify, time::sleep}; use tokio_postgres::{Client, NoTls}; diff --git a/tests/sqllogictest.rs b/tests/sqllogictest.rs index e7832e1..ebade9d 100644 --- a/tests/sqllogictest.rs +++ b/tests/sqllogictest.rs @@ -1,15 +1,16 @@ #[cfg(test)] mod sqllogictest_tests { - use anyhow::Result; - use async_trait::async_trait; - use dotenv::dotenv; - use serial_test::serial; - use sqllogictest::{AsyncDB, DBOutput, DefaultColumnType}; use std::{ path::Path, sync::Arc, time::{Duration, Instant}, }; + + use anyhow::Result; + use async_trait::async_trait; + use dotenv::dotenv; + use serial_test::serial; + use sqllogictest::{AsyncDB, DBOutput, DefaultColumnType}; use timefusion::database::Database; use tokio::{sync::Notify, time::sleep}; use tokio_postgres::{NoTls, Row}; From 9288dffa66ea120fe2d573ce754d16797a65956c Mon Sep 17 00:00:00 2001 From: Oluwapeluwa Ibrahim Date: Thu, 10 Apr 2025 12:32:31 +0100 Subject: [PATCH 06/14] refactor: Make AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY required in Config - Updated Config to use String instead of Option for aws_access_key_id and aws_secret_access_key - Modified Config::from_env() to fail if AWS credentials are missing - Updated Database::new() to pass required credentials to register_project() - Simplified Database::new_for_test() to use required config credentials --- src/config.rs | 57 +++++++++++++++++++++++ src/database.rs | 100 ++++++++++++---------------------------- src/lib.rs | 1 + src/main.rs | 19 +++++--- src/persistent_queue.rs | 1 + 5 files changed, 101 insertions(+), 77 deletions(-) create mode 100644 src/config.rs diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..d8d3d52 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,57 @@ +// src/config.rs +use std::env; + +use crate::error::{Result, TimeFusionError}; + +#[derive(Clone)] +pub struct Config { + pub pg_port: u16, + pub http_port: String, + pub s3_bucket: String, + pub s3_endpoint: String, + pub table_prefix: String, + pub aws_access_key_id: String, + pub aws_secret_access_key: String, +} + +impl Config { + pub fn from_env() -> Result { + let pg_port = env::var("PGWIRE_PORT") + .unwrap_or_else(|_| "5432".to_string()) + .parse::() + .map_err(|e| TimeFusionError::Config(format!("Invalid PGWIRE_PORT: {}", e)))?; + + let http_port = env::var("PORT").unwrap_or_else(|_| "80".to_string()); + if http_port.parse::().is_err() { + return Err(TimeFusionError::Config(format!("Invalid PORT: {} must be a valid u16", http_port))); + } + + let s3_bucket = env::var("AWS_S3_BUCKET").map_err(|_| TimeFusionError::Config("AWS_S3_BUCKET environment variable not set".to_string()))?; + + let s3_endpoint = env::var("AWS_S3_ENDPOINT").unwrap_or_else(|_| "https://s3.amazonaws.com".to_string()); + if s3_endpoint.is_empty() || (!s3_endpoint.starts_with("http://") && !s3_endpoint.starts_with("https://")) { + return Err(TimeFusionError::Config(format!("Invalid AWS_S3_ENDPOINT: {} must be a valid URL", s3_endpoint))); + } + + let table_prefix = env::var("TIMEFUSION_TABLE_PREFIX").unwrap_or_else(|_| "timefusion".to_string()); + if table_prefix.is_empty() { + return Err(TimeFusionError::Config("TIMEFUSION_TABLE_PREFIX cannot be empty".to_string())); + } + + // Load AWS credentials, required + let aws_access_key_id = + env::var("AWS_ACCESS_KEY_ID").map_err(|_| TimeFusionError::Config("AWS_ACCESS_KEY_ID environment variable not set".to_string()))?; + let aws_secret_access_key = + env::var("AWS_SECRET_ACCESS_KEY").map_err(|_| TimeFusionError::Config("AWS_SECRET_ACCESS_KEY environment variable not set".to_string()))?; + + Ok(Config { + pg_port, + http_port, + s3_bucket, + s3_endpoint, + table_prefix, + aws_access_key_id, + aws_secret_access_key, + }) + } +} diff --git a/src/database.rs b/src/database.rs index 1a61b35..1e68f07 100644 --- a/src/database.rs +++ b/src/database.rs @@ -1,4 +1,4 @@ -use std::{any::Any, collections::HashMap, env, fmt, sync::Arc}; +use std::{any::Any, collections::HashMap, fmt, sync::Arc}; use arrow_schema::SchemaRef; use async_trait::async_trait; @@ -25,6 +25,7 @@ use tracing::{debug, error, info}; use url::Url; use crate::{ + config::Config, error::{Result, TimeFusionError}, persistent_queue::OtelLogsAndSpans, }; @@ -47,17 +48,12 @@ impl Clone for Database { } impl Database { - #[tracing::instrument(name = "db.new", skip())] - pub async fn new() -> Result { - let bucket = env::var("AWS_S3_BUCKET").map_err(|_| TimeFusionError::Config("AWS_S3_BUCKET environment variable not set".to_string()))?; - let aws_endpoint = env::var("AWS_S3_ENDPOINT").unwrap_or_else(|_| "https://s3.amazonaws.com".to_string()); - - // Generate a unique prefix for this run's data - let prefix = env::var("TIMEFUSION_TABLE_PREFIX").unwrap_or_else(|_| "timefusion".to_string()); - let storage_uri = format!("s3://{}/{}/?endpoint={}", bucket, prefix, aws_endpoint); + #[tracing::instrument(name = "db.new", skip(config))] + pub async fn new(config: &Config) -> Result { + let storage_uri = format!("s3://{}/{}/?endpoint={}", config.s3_bucket, config.table_prefix, config.s3_endpoint); info!("Storage URI configured: {}", storage_uri); - let aws_url = Url::parse(&aws_endpoint).map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; + let aws_url = Url::parse(&config.s3_endpoint).map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; deltalake::aws::register_handlers(Some(aws_url)); info!("AWS handlers registered"); @@ -67,33 +63,28 @@ impl Database { project_configs: Arc::new(RwLock::new(project_configs)), }; - db.register_project("default", &storage_uri, None, None, None).await?; + // Pass credentials to register_project since they're required + db.register_project( + "default", + &storage_uri, + Some(&config.aws_access_key_id), + Some(&config.aws_secret_access_key), + Some(&config.s3_endpoint), + ) + .await?; Ok(db) } #[cfg(test)] - #[tracing::instrument(name = "db.new_for_test", skip())] - pub async fn new_for_test() -> Result { - // For tests, we directly configure all AWS env vars + #[tracing::instrument(name = "db.new_for_test", skip(config))] + pub async fn new_for_test(config: &Config) -> Result { info!("Starting Database in test mode"); - // Show all environment variables for debugging - for (key, value) in env::vars() { - if key.starts_with("AWS_") { - info!("ENV: {}={}", key, value); - } - } - - let bucket = env::var("AWS_S3_BUCKET").map_err(|_| TimeFusionError::Config("AWS_S3_BUCKET environment variable not set".to_string()))?; - let aws_endpoint = env::var("AWS_S3_ENDPOINT").unwrap_or_else(|_| "https://s3.amazonaws.com".to_string()); - - // Generate a unique prefix for this run's data - let prefix = env::var("TIMEFUSION_TABLE_PREFIX").unwrap_or_else(|_| "timefusion".to_string()); - let storage_uri = format!("s3://{}/{}/?endpoint={}", bucket, prefix, aws_endpoint); + let storage_uri = format!("s3://{}/{}/?endpoint={}", config.s3_bucket, config.table_prefix, config.s3_endpoint); info!("Storage URI configured: {}", storage_uri); - let aws_url = Url::parse(&aws_endpoint).map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; + let aws_url = Url::parse(&config.s3_endpoint).map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; deltalake::aws::register_handlers(Some(aws_url)); info!("AWS handlers registered"); @@ -103,32 +94,32 @@ impl Database { project_configs: Arc::new(RwLock::new(project_configs)), }; - // For tests, pass credentials explicitly - let access_key = env::var("AWS_ACCESS_KEY_ID").map_err(|_| TimeFusionError::Config("AWS_ACCESS_KEY_ID not set".to_string()))?; - let secret_key = env::var("AWS_SECRET_ACCESS_KEY").map_err(|_| TimeFusionError::Config("AWS_SECRET_ACCESS_KEY not set".to_string()))?; - info!("Registering project with explicit credentials"); - db.register_project("default", &storage_uri, Some(&access_key), Some(&secret_key), Some(&aws_endpoint)).await?; + db.register_project( + "default", + &storage_uri, + Some(&config.aws_access_key_id), + Some(&config.aws_secret_access_key), + Some(&config.s3_endpoint), + ) + .await?; Ok(db) } - /// Create and configure a SessionContext with DataFusion settings #[tracing::instrument(name = "db.create_session_context", skip(self))] pub fn create_session_context(&self) -> SessionContext { - use datafusion::{config::ConfigOptions, execution::context::SessionContext}; + use datafusion::config::ConfigOptions; let mut options = ConfigOptions::new(); let _ = options.set("datafusion.sql_parser.enable_information_schema", "true"); SessionContext::new_with_config(options.into()) } - /// Setup the session context with tables and register DataFusion tables #[tracing::instrument(name = "db.setup_session_context", skip(self, ctx))] pub fn setup_session_context(&self, ctx: &SessionContext) -> DFResult<()> { use crate::persistent_queue::OtelLogsAndSpans; - // Create tables and register them with session context let schema = OtelLogsAndSpans::schema_ref(); let routing_table = ProjectRoutingTable::new("default".to_string(), Arc::new(self.clone()), schema); ctx.register_table(OtelLogsAndSpans::table_name(), Arc::new(routing_table))?; @@ -140,9 +131,8 @@ impl Database { Ok(()) } - /// Register PostgreSQL settings table for compatibility #[tracing::instrument(name = "db.register_pg_settings_table", skip(self, ctx))] - pub fn register_pg_settings_table(&self, ctx: &SessionContext) -> datafusion::error::Result<()> { + pub fn register_pg_settings_table(&self, ctx: &SessionContext) -> DFResult<()> { use datafusion::arrow::{ array::StringArray, datatypes::{DataType, Field, Schema}, @@ -163,7 +153,6 @@ impl Database { Ok(()) } - /// Register set_config UDF for PostgreSQL compatibility #[tracing::instrument(name = "db.register_set_config_udf", skip(self, ctx))] pub fn register_set_config_udf(&self, ctx: &SessionContext) { use datafusion::{ @@ -174,7 +163,7 @@ impl Database { logical_expr::{ColumnarValue, ScalarFunctionImplementation, Volatility, create_udf}, }; - let set_config_fn: ScalarFunctionImplementation = Arc::new(move |args: &[ColumnarValue]| -> datafusion::error::Result { + let set_config_fn: ScalarFunctionImplementation = Arc::new(move |args: &[ColumnarValue]| -> DFResult { let param_value_array = match &args[1] { ColumnarValue::Array(array) => array.as_any().downcast_ref::().expect("set_config second arg must be a StringArray"), _ => panic!("set_config second arg must be an array"), @@ -202,7 +191,6 @@ impl Database { ctx.register_udf(set_config_udf); } - /// Start a PGWire server with the given session context #[tracing::instrument(name = "db.start_pgwire_server", skip(self, session_context, shutdown_token), fields(port))] pub async fn start_pgwire_server( &self, session_context: SessionContext, port: u16, shutdown_token: CancellationToken, @@ -259,12 +247,10 @@ impl Database { pub async fn resolve_table(&self, project_id: &str) -> DFResult>> { let project_configs = self.project_configs.read().await; - // Try to get the requested project table first if let Some((_, _, table)) = project_configs.get(project_id) { return Ok(table.clone()); } - // If not found and project_id is not "default", try the default table if project_id != "default" { if let Some((_, _, table)) = project_configs.get("default") { log::warn!("Project '{}' not found, falling back to default project", project_id); @@ -272,7 +258,6 @@ impl Database { } } - // If we get here, neither the requested project nor default exists Err(DataFusionError::Execution(format!( "Unknown project_id: {} and no default project found", project_id @@ -281,7 +266,6 @@ impl Database { #[tracing::instrument(name = "db.insert_records_batch", skip(self, _table, batch), fields(batch_size = batch.len()))] pub async fn insert_records_batch(&self, _table: &str, batch: Vec) -> Result<()> { - // Get the table reference for the default project let (_conn_str, _options, table_ref) = { let configs = self.project_configs.read().await; configs @@ -302,20 +286,13 @@ impl Database { #[cfg(test)] #[tracing::instrument(name = "db.insert_records", skip(self, records))] pub async fn insert_records(&self, records: &Vec) -> Result<()> { - // TODO: insert records doesn't need to accept a project_id as they can be read from the - // record. - // Records should be grouped by span, and separated into groups then inserted into the - // correct table. - use serde_arrow::schema::SchemaLike; - // Convert OtelLogsAndSpans records to Arrow RecordBatch format let fields = Vec::::from_type::(serde_arrow::schema::TracingOptions::default()) .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to create schema fields: {}", e)))?; let batch = serde_arrow::to_record_batch(&fields, &records) .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to convert to record batch: {}", e)))?; - // Call insert_records_batch with the converted batch to reuse common insertion logic self.insert_records_batch("default", vec![batch]).await } @@ -344,8 +321,6 @@ impl Database { Err(err) => { log::warn!("table doesn't exist. creating new table. err: {:?}", err); - // Create the table with project_id partitioning only for now - // Timestamp partitioning is likely causing issues with nanosecond precision let delta_ops = DeltaOps::try_from_uri(&conn_str).await.map_err(TimeFusionError::Database)?; delta_ops .create() @@ -362,14 +337,11 @@ impl Database { Ok(()) } - /// Flushes any pending writes to Delta Lake for all projects #[tracing::instrument(name = "db.flush_pending_writes", skip(self))] pub async fn flush_pending_writes(&self) -> Result<()> { let configs = self.project_configs.read().await; for (project_id, (_, _, table)) in configs.iter() { let mut table = table.write().await; - // Delta Lake doesn't have an explicit flush method, but we can ensure - // the table is up-to-date by loading its latest state *table = deltalake::open_table(&table.table_uri()).await.map_err(TimeFusionError::Database)?; debug!("Flushed pending writes for project: {}", project_id); } @@ -394,7 +366,6 @@ impl ProjectRoutingTable { } fn extract_project_id_from_filters(&self, filters: &[Expr]) -> Option { - // Look for expressions like "project_id = 'some_value'" for filter in filters { if let Some(project_id) = self.extract_project_id(filter) { return Some(project_id); @@ -409,24 +380,17 @@ impl ProjectRoutingTable { fn extract_project_id(&self, expr: &Expr) -> Option { match expr { - // Binary expression: "project_id = 'value'" Expr::BinaryExpr(BinaryExpr { left, op, right }) => { - // Check if this is an equality operation if *op == Operator::Eq { - // Check if left side is a column reference to "project_id" if let Expr::Column(col) = left.as_ref() { if col.name == "project_id" { - // Check if right side is a literal string if let Expr::Literal(ScalarValue::Utf8(Some(value))) = right.as_ref() { return Some(value.clone()); } } } - - // Also check if right side is the column (order might be flipped) if let Expr::Column(col) = right.as_ref() { if col.name == "project_id" { - // Check if left side is a literal string if let Expr::Literal(ScalarValue::Utf8(Some(value))) = left.as_ref() { return Some(value.clone()); } @@ -441,7 +405,6 @@ impl ProjectRoutingTable { } } -// Needed by DataSink impl DisplayAs for ProjectRoutingTable { fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result { match t { @@ -492,8 +455,6 @@ impl TableProvider for ProjectRoutingTable { } async fn insert_into(&self, _state: &dyn Session, input: Arc, insert_op: InsertOp) -> DFResult> { - // Create a physical plan from the logical plan. - // Check that the schema of the plan matches the schema of this table. self.schema().logically_equivalent_names_and_types(&input.schema())?; if insert_op != InsertOp::Append { @@ -508,7 +469,6 @@ impl TableProvider for ProjectRoutingTable { } async fn scan(&self, state: &dyn Session, projection: Option<&Vec>, filters: &[Expr], limit: Option) -> DFResult> { - // Get project_id from filters if possible, otherwise use default let project_id = self.extract_project_id_from_filters(filters).unwrap_or_else(|| self.default_project.clone()); let delta_table = self.database.resolve_table(&project_id).await?; diff --git a/src/lib.rs b/src/lib.rs index c2fcb88..9971cc8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ // lib.rs - Export modules for use in tests +pub mod config; pub mod database; pub mod error; pub mod persistent_queue; diff --git a/src/main.rs b/src/main.rs index bdce0fb..fffbee3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,10 @@ // src/main.rs +mod config; mod database; mod error; mod persistent_queue; mod telemetry; -use std::{env, sync::Arc}; +use std::sync::Arc; use actix_web::{App, HttpResponse, HttpServer, Responder, middleware::Logger, post, web}; use database::Database; @@ -18,7 +19,10 @@ use tokio_util::sync::CancellationToken; use tracing::{error, info}; use tracing_actix_web::TracingLogger; -use crate::error::{Result, TimeFusionError}; +use crate::{ + config::Config, + error::{Result, TimeFusionError}, +}; struct AppState { db: Arc, @@ -63,12 +67,15 @@ async fn register_project(req: web::Json, app_state: web async fn main() -> Result<()> { dotenv().ok(); + // Load configuration once at startup + let config = Config::from_env()?; + // Initialize tracing & metrics telemetry::init_telemetry(); info!("Starting TimeFusion application"); - let db = Database::new().await?; + let db = Database::new(&config).await?; // Pass &config here info!("Database initialized successfully"); let session_context = db.create_session_context(); @@ -80,7 +87,6 @@ async fn main() -> Result<()> { let shutdown_token = CancellationToken::new(); let http_shutdown = shutdown_token.clone(); - // Spawn database write monitor let db_clone = db.clone(); let shutdown_monitor = shutdown_token.clone(); tokio::spawn(async move { @@ -94,8 +100,7 @@ async fn main() -> Result<()> { } }); - let pg_port = env::var("PGWIRE_PORT").unwrap_or_else(|_| "5432".to_string()).parse::().unwrap_or(5432); - let pg_server = db.start_pgwire_server(session_context.clone(), pg_port, shutdown_token.clone()).await?; + let pg_server = db.start_pgwire_server(session_context.clone(), config.pg_port, shutdown_token.clone()).await?; tokio::time::sleep(Duration::from_secs(1)).await; if pg_server.is_finished() { @@ -103,7 +108,7 @@ async fn main() -> Result<()> { return Err(TimeFusionError::Generic(anyhow::anyhow!("PGWire server failed to start"))); } - let http_addr = format!("0.0.0.0:{}", env::var("PORT").unwrap_or_else(|_| "80".to_string())); + let http_addr = format!("0.0.0.0:{}", config.http_port); let http_server = HttpServer::new(move || { App::new() .wrap(TracingLogger::default()) diff --git a/src/persistent_queue.rs b/src/persistent_queue.rs index cc5ed0a..06b93f0 100644 --- a/src/persistent_queue.rs +++ b/src/persistent_queue.rs @@ -5,6 +5,7 @@ use delta_kernel::schema::StructField; use serde::{Deserialize, Serialize}; use serde_arrow::schema::{SchemaLike, TracingOptions}; use serde_json::json; + use crate::error::{Result, TimeFusionError}; #[allow(non_snake_case)] From 9ad7e5755ba1c09b9d93bcace15d4f571e37c389 Mon Sep 17 00:00:00 2001 From: Oluwapeluwa Ibrahim Date: Thu, 10 Apr 2025 14:15:24 +0100 Subject: [PATCH 07/14] feat: Add validation to insert_records_batch - Updated insert_records_batch to deserialize RecordBatches into OtelLogsAndSpans - Added validation for each record using OtelLogsAndSpans::validate() - Ensured write operation only proceeds if all records are valid - Included serde_arrow::SchemaLike for deserialization support --- .DS_Store | Bin 6148 -> 6148 bytes src/database.rs | 134 +++++++++++++++++++------------ src/error.rs | 3 + src/persistent_queue.rs | 163 +++++++++++++++++++++++++++++--------- tests/integration_test.rs | 63 ++------------- tests/sqllogictest.rs | 9 +-- 6 files changed, 219 insertions(+), 153 deletions(-) diff --git a/.DS_Store b/.DS_Store index 4a5f99c0a1f0802473e21255ae035a25a7657230..72a570c9757ccfd52e8503bb2c2218776af0d30d 100644 GIT binary patch delta 505 zcmZoMXfc@J&nU4mU^g?P#AF^8$vEA9PX-1C76v_rbcRfZlH7b3m!zEhB%l~a|LL{Q zKVClOh$^3gSH2*_FgQ6sw*aVzfgzk>ax;q}6I;^cWh{mD>BU;R3Ti1TU9{@AF_1{43iUBMJ8*o8n9{UcRuAjJvok5 z3BeSbJcm`?mXTux&{50`#SBFZ$;ghe{J!7)HN-J+S%f1XvXj`P>H|Q5z{*ep@-C1> p^K!&FR9R#jPKrmbg?blBe{Md)7VIief}e?@U}EB(&Fmb1`2kq|i1`2j delta 235 zcmZoMXfc@J&nU1lU^g?Pz+@g4$vCzHIt&a9EDU-K=?s|+CAs-7E=f80NkB1&;kc?YWjTY;;O&7$>_ z-?1tom|~N4*wk$q_U{AgVrD32C}K!PcFAXl+uFw>); pub type ProjectConfigs = Arc>>; @@ -53,7 +52,8 @@ impl Database { let storage_uri = format!("s3://{}/{}/?endpoint={}", config.s3_bucket, config.table_prefix, config.s3_endpoint); info!("Storage URI configured: {}", storage_uri); - let aws_url = Url::parse(&config.s3_endpoint).map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; + let aws_url = Url::parse(&config.s3_endpoint) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; deltalake::aws::register_handlers(Some(aws_url)); info!("AWS handlers registered"); @@ -63,15 +63,13 @@ impl Database { project_configs: Arc::new(RwLock::new(project_configs)), }; - // Pass credentials to register_project since they're required db.register_project( "default", &storage_uri, Some(&config.aws_access_key_id), Some(&config.aws_secret_access_key), Some(&config.s3_endpoint), - ) - .await?; + ).await?; Ok(db) } @@ -84,7 +82,8 @@ impl Database { let storage_uri = format!("s3://{}/{}/?endpoint={}", config.s3_bucket, config.table_prefix, config.s3_endpoint); info!("Storage URI configured: {}", storage_uri); - let aws_url = Url::parse(&config.s3_endpoint).map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; + let aws_url = Url::parse(&config.s3_endpoint) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; deltalake::aws::register_handlers(Some(aws_url)); info!("AWS handlers registered"); @@ -101,8 +100,7 @@ impl Database { Some(&config.aws_access_key_id), Some(&config.aws_secret_access_key), Some(&config.s3_endpoint), - ) - .await?; + ).await?; Ok(db) } @@ -133,11 +131,9 @@ impl Database { #[tracing::instrument(name = "db.register_pg_settings_table", skip(self, ctx))] pub fn register_pg_settings_table(&self, ctx: &SessionContext) -> DFResult<()> { - use datafusion::arrow::{ - array::StringArray, - datatypes::{DataType, Field, Schema}, - record_batch::RecordBatch, - }; + use datafusion::arrow::array::StringArray; + use datafusion::arrow::datatypes::{DataType, Field, Schema}; + use datafusion::arrow::record_batch::RecordBatch; let schema = Arc::new(Schema::new(vec![ Field::new("name", DataType::Utf8, false), @@ -155,13 +151,9 @@ impl Database { #[tracing::instrument(name = "db.register_set_config_udf", skip(self, ctx))] pub fn register_set_config_udf(&self, ctx: &SessionContext) { - use datafusion::{ - arrow::{ - array::{StringArray, StringBuilder}, - datatypes::DataType, - }, - logical_expr::{ColumnarValue, ScalarFunctionImplementation, Volatility, create_udf}, - }; + use datafusion::arrow::array::{StringArray, StringBuilder}; + use datafusion::arrow::datatypes::DataType; + use datafusion::logical_expr::{ColumnarValue, ScalarFunctionImplementation, Volatility, create_udf}; let set_config_fn: ScalarFunctionImplementation = Arc::new(move |args: &[ColumnarValue]| -> DFResult { let param_value_array = match &args[1] { @@ -200,7 +192,8 @@ impl Database { let pg_service = Arc::new(DfSessionService::new(session_context)); let handler_factory = Arc::new(HandlerFactory(pg_service.clone())); - let pg_listener = TcpListener::bind(format!("0.0.0.0:{}", port)).await.map_err(TimeFusionError::Io)?; + let pg_listener = TcpListener::bind(format!("0.0.0.0:{}", port)).await + .map_err(TimeFusionError::Io)?; info!("PGWire server running on 0.0.0.0:{}", port); let pgwire_shutdown = shutdown_token.clone(); @@ -266,10 +259,18 @@ impl Database { #[tracing::instrument(name = "db.insert_records_batch", skip(self, _table, batch), fields(batch_size = batch.len()))] pub async fn insert_records_batch(&self, _table: &str, batch: Vec) -> Result<()> { + for record_batch in &batch { + let records: Vec = serde_arrow::from_record_batch(record_batch) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to deserialize record batch: {}", e)))?; + + for record in records { + record.validate()?; + } + } + let (_conn_str, _options, table_ref) = { let configs = self.project_configs.read().await; - configs - .get("default") + configs.get("default") .ok_or_else(|| TimeFusionError::Generic(anyhow::anyhow!("Project ID 'default' not found")))? .clone() }; @@ -278,7 +279,8 @@ impl Database { let ops = DeltaOps(table.clone()); let write_op = ops.write(batch).with_partition_columns(OtelLogsAndSpans::partitions()); - *table = write_op.await.map_err(TimeFusionError::Database)?; + *table = write_op.await + .map_err(TimeFusionError::Database)?; Ok(()) } @@ -286,7 +288,11 @@ impl Database { #[cfg(test)] #[tracing::instrument(name = "db.insert_records", skip(self, records))] pub async fn insert_records(&self, records: &Vec) -> Result<()> { - use serde_arrow::schema::SchemaLike; + use serde_arrow::schema::SchemaLike; // Import here for from_type + + for record in records { + record.validate()?; + } let fields = Vec::::from_type::(serde_arrow::schema::TracingOptions::default()) .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to create schema fields: {}", e)))?; @@ -316,12 +322,18 @@ impl Database { storage_options.0.insert("AWS_ALLOW_HTTP".to_string(), "true".to_string()); - let table = match DeltaTableBuilder::from_uri(conn_str).with_storage_options(storage_options.0.clone()).with_allow_http(true).load().await { + let table = match DeltaTableBuilder::from_uri(conn_str) + .with_storage_options(storage_options.0.clone()) + .with_allow_http(true) + .load() + .await + { Ok(table) => table, Err(err) => { log::warn!("table doesn't exist. creating new table. err: {:?}", err); - let delta_ops = DeltaOps::try_from_uri(&conn_str).await.map_err(TimeFusionError::Database)?; + let delta_ops = DeltaOps::try_from_uri(&conn_str).await + .map_err(TimeFusionError::Database)?; delta_ops .create() .with_columns(OtelLogsAndSpans::columns().unwrap_or_default()) @@ -342,7 +354,9 @@ impl Database { let configs = self.project_configs.read().await; for (project_id, (_, _, table)) in configs.iter() { let mut table = table.write().await; - *table = deltalake::open_table(&table.table_uri()).await.map_err(TimeFusionError::Database)?; + *table = deltalake::open_table(&table.table_uri()) + .await + .map_err(TimeFusionError::Database)?; debug!("Flushed pending writes for project: {}", project_id); } Ok(()) @@ -352,8 +366,8 @@ impl Database { #[derive(Debug, Clone)] pub struct ProjectRoutingTable { default_project: String, - database: Arc, - schema: SchemaRef, + database: Arc, + schema: SchemaRef, } impl ProjectRoutingTable { @@ -495,13 +509,15 @@ mod tests { let _ = env_logger::builder().is_test(true).try_init(); dotenv().ok(); + // Load the base config from environment variables + let mut config = Config::from_env()?; + // Set a unique test-specific prefix for a clean Delta table let test_prefix = format!("test-data-{}", prefix); - unsafe { - env::set_var("TIMEFUSION_TABLE_PREFIX", &test_prefix); - } + config.table_prefix = test_prefix.clone(); // Override the table_prefix - let db = Database::new().await?; + // Create the database with the modified config + let db = Database::new(&config).await?; let mut session_context = SessionContext::new(); datafusion_functions_json::register_all(&session_context)?; let schema = OtelLogsAndSpans::schema_ref(); @@ -751,6 +767,24 @@ mod tests { Ok(()) } + #[tokio::test] + async fn test_insert_records_batch_validation() { + let config = Config::from_env().unwrap(); + let db = Database::new_for_test(&config).await.unwrap(); + + let invalid_record = OtelLogsAndSpans { + id: "".to_string(), // Invalid: empty id + project_id: "test_proj".to_string(), + timestamp: Utc::now(), + ..Default::default() + }; + let fields = Vec::::from_type::(serde_arrow::schema::TracingOptions::default()).unwrap(); + let batch = serde_arrow::to_record_batch(&fields, &vec![invalid_record]).unwrap(); + + let result = db.insert_records_batch("default", vec![batch]).await; + assert!(matches!(result, Err(TimeFusionError::Validation(msg)) if msg == "id must not be empty")); + } + #[serial] #[tokio::test] async fn test_sql_insert() -> Result<()> { diff --git a/src/error.rs b/src/error.rs index 96f9055..eebf0aa 100644 --- a/src/error.rs +++ b/src/error.rs @@ -25,6 +25,9 @@ pub enum TimeFusionError { #[error("Generic error: {0}")] Generic(#[from] anyhow::Error), + + #[error("Validation error: {0}")] + Validation(String), } impl actix_web::ResponseError for TimeFusionError { diff --git a/src/persistent_queue.rs b/src/persistent_queue.rs index 06b93f0..566da0b 100644 --- a/src/persistent_queue.rs +++ b/src/persistent_queue.rs @@ -1,6 +1,7 @@ use std::sync::Arc; -use arrow_schema::{DataType, Field, Schema, SchemaRef, TimeUnit}; +use arrow_schema::{DataType, Schema, SchemaRef, TimeUnit}; // Removed Field +use chrono::{DateTime, Utc}; use delta_kernel::schema::StructField; use serde::{Deserialize, Serialize}; use serde_arrow::schema::{SchemaLike, TracingOptions}; @@ -21,41 +22,32 @@ pub struct OtelLogsAndSpans { pub status_code: Option, pub status_message: Option, - // Logs specific - pub level: Option, // same as severity text + pub level: Option, pub severity___severity_text: Option, pub severity___severity_number: Option, - pub body: Option, // body as json + pub body: Option, - pub duration: Option, // nanoseconds + pub duration: Option, #[serde(with = "chrono::serde::ts_microseconds_option")] pub start_time: Option>, #[serde(with = "chrono::serde::ts_microseconds_option")] pub end_time: Option>, - // Context pub context___trace_id: Option, pub context___span_id: Option, pub context___trace_state: Option, pub context___trace_flags: Option, pub context___is_remote: Option, - // Events - pub events: Option, // events json + pub events: Option, + pub links: Option, - // Links - pub links: Option, // links json - - // Attributes - - // Server and client pub attributes___client___address: Option, pub attributes___client___port: Option, pub attributes___server___address: Option, pub attributes___server___port: Option, - // network https://opentelemetry.io/docs/specs/semconv/attributes-registry/network/ pub attributes___network___local__address: Option, pub attributes___network___local__port: Option, pub attributes___network___peer___address: Option, @@ -65,44 +57,37 @@ pub struct OtelLogsAndSpans { pub attributes___network___transport: Option, pub attributes___network___type: Option, - // Source Code Attributes pub attributes___code___number: Option, - pub attributes___code___file___path: Option, - pub attributes___code___function___name: Option, + pub attributes___code___file___path: Option, + pub attributes___code___function___name: Option, pub attributes___code___line___number: Option, - pub attributes___code___stacktrace: Option, - // Log records. https://opentelemetry.io/docs/specs/semconv/general/logs/ + pub attributes___code___stacktrace: Option, + pub attributes___log__record___original: Option, pub attributes___log__record___uid: Option, - // Exception https://opentelemetry.io/docs/specs/semconv/exceptions/exceptions-logs/ pub attributes___error___type: Option, pub attributes___exception___type: Option, pub attributes___exception___message: Option, pub attributes___exception___stacktrace: Option, - // URL https://opentelemetry.io/docs/specs/semconv/attributes-registry/url/ pub attributes___url___fragment: Option, pub attributes___url___full: Option, pub attributes___url___path: Option, pub attributes___url___query: Option, pub attributes___url___scheme: Option, - // Useragent https://opentelemetry.io/docs/specs/semconv/attributes-registry/user-agent/ pub attributes___user_agent___original: Option, - // HTTP https://opentelemetry.io/docs/specs/semconv/http/http-spans/ pub attributes___http___request___method: Option, pub attributes___http___request___method_original: Option, pub attributes___http___response___status_code: Option, pub attributes___http___request___resend_count: Option, pub attributes___http___request___body___size: Option, - // Session https://opentelemetry.io/docs/specs/semconv/general/session/ pub attributes___session___id: Option, pub attributes___session___previous___id: Option, - // Database https://opentelemetry.io/docs/specs/semconv/database/database-spans/ pub attributes___db___system___name: Option, pub attributes___db___collection___name: Option, pub attributes___db___namespace: Option, @@ -112,14 +97,12 @@ pub struct OtelLogsAndSpans { pub attributes___db___query___summary: Option, pub attributes___db___query___text: Option, - // https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ pub attributes___user___id: Option, pub attributes___user___email: Option, pub attributes___user___full_name: Option, pub attributes___user___name: Option, pub attributes___user___hash: Option, - // Resource Attributes (subset) https://opentelemetry.io/docs/specs/semconv/resource/ pub resource___attributes___service___name: Option, pub resource___attributes___service___version: Option, pub resource___attributes___service___instance___id: Option, @@ -130,10 +113,8 @@ pub struct OtelLogsAndSpans { pub resource___attributes___telemetry___sdk___version: Option, pub resource___attributes___user_agent___original: Option, - // Kept at the bottom to make delta-rs happy, so its schema matches datafusion. - // Seems delta removes the partition ids from the normal schema and moves them to the end. - // Top-level fields - pub project_id: String, + + pub project_id: String, #[serde(with = "chrono::serde::ts_microseconds")] pub timestamp: chrono::DateTime, @@ -145,7 +126,6 @@ impl OtelLogsAndSpans { } pub fn columns() -> Result> { - // Use custom Result let tracing_options = TracingOptions::default() .overwrite("project_id", json!({"name": "project_id", "data_type": "Utf8", "nullable": false})) .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to overwrite project_id: {}", e)))? @@ -180,7 +160,6 @@ impl OtelLogsAndSpans { .collect::, _>>() .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to convert fields to StructField: {}", e)))?; - // Validate the last two fields as a sanity check if fields.len() < 2 || fields[fields.len() - 2].data_type() != &DataType::Utf8 || fields[fields.len() - 1].data_type() != &DataType::Timestamp(TimeUnit::Microsecond, None) @@ -194,17 +173,123 @@ impl OtelLogsAndSpans { } pub fn schema_ref() -> SchemaRef { - let columns = OtelLogsAndSpans::columns().unwrap_or_else(|e| { - log::error!("Failed to get columns: {:?}", e); + let tracing_options = TracingOptions::default() + .overwrite("project_id", json!({"name": "project_id", "data_type": "Utf8", "nullable": false})) + .and_then(|to| { + to.overwrite( + "timestamp", + json!({"name": "timestamp", "data_type": "Timestamp(Microsecond, None)", "nullable": false}), + ) + }) + .and_then(|to| to.overwrite("id", json!({"name": "id", "data_type": "Utf8", "nullable": false}))) + .and_then(|to| { + to.overwrite( + "observed_timestamp", + json!({"name": "observed_timestamp", "data_type": "Timestamp(Microsecond, None)", "nullable": true}), + ) + }) + .and_then(|to| { + to.overwrite( + "start_time", + json!({"name": "start_time", "data_type": "Timestamp(Microsecond, None)", "nullable": true}), + ) + }) + .and_then(|to| { + to.overwrite( + "end_time", + json!({"name": "end_time", "data_type": "Timestamp(Microsecond, None)", "nullable": true}), + ) + }) + .unwrap_or_else(|e| { + log::error!("Failed to configure tracing options: {:?}", e); + TracingOptions::default() + }); + + let fields = Vec::::from_type::(tracing_options).unwrap_or_else(|e| { + log::error!("Failed to generate fields for schema: {:?}", e); Vec::new() }); - let arrow_fields: Vec = columns.iter().filter_map(|sf| sf.try_into().ok()).collect(); - - Arc::new(Schema::new(arrow_fields)) + Arc::new(Schema::new( + fields.into_iter().map(|f| f.as_ref().clone()).collect::>(), + )) } pub fn partitions() -> Vec { vec!["project_id".to_string(), "timestamp".to_string()] } + + pub fn validate(&self) -> Result<()> { + if self.id.is_empty() { + return Err(TimeFusionError::Validation("id must not be empty".to_string())); + } + if self.project_id.is_empty() { + return Err(TimeFusionError::Validation("project_id must not be empty".to_string())); + } + + let min_time = DateTime::from_timestamp(0, 0).unwrap(); + let max_time = Utc::now() + chrono::Duration::days(1); + + if self.timestamp < min_time || self.timestamp > max_time { + return Err(TimeFusionError::Validation(format!( + "timestamp '{}' out of range ({} to {})", + self.timestamp, min_time, max_time + ))); + } + + if let Some(obs_time) = self.observed_timestamp { + if obs_time < min_time || obs_time > max_time { + return Err(TimeFusionError::Validation(format!( + "observed_timestamp '{}' out of range ({} to {})", + obs_time, min_time, max_time + ))); + } + } + + if let Some(start) = self.start_time { + if start < min_time || start > max_time { + return Err(TimeFusionError::Validation(format!( + "start_time '{}' out of range ({} to {})", + start, min_time, max_time + ))); + } + } + + if let Some(end) = self.end_time { + if end < min_time || end > max_time { + return Err(TimeFusionError::Validation(format!( + "end_time '{}' out of range ({} to {})", + end, min_time, max_time + ))); + } + } + + if let (Some(start), Some(end)) = (self.start_time, self.end_time) { + if start > end { + return Err(TimeFusionError::Validation(format!( + "start_time '{}' must not be after end_time '{}'", + start, end + ))); + } + } + + if let Some(duration) = self.duration { + if duration == 0 { + return Err(TimeFusionError::Validation("duration must be positive if present".to_string())); + } + } + + if let Some(port) = self.attributes___client___port { + if port > 65535 { + return Err(TimeFusionError::Validation(format!("client_port '{}' exceeds valid range (0-65535)", port))); + } + } + if let Some(port) = self.attributes___server___port { + if port > 65535 { + return Err(TimeFusionError::Validation(format!("server_port '{}' exceeds valid range (0-65535)", port))); + } + } + + Ok(()) + } } diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 91ad788..043383c 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -11,7 +11,7 @@ mod integration { use rand::Rng; use scopeguard; use serial_test::serial; - use timefusion::database::Database; + use timefusion::{config::Config, database::Database}; use tokio::{sync::Notify, time::sleep}; use tokio_postgres::{Client, NoTls}; use tokio_util::sync::CancellationToken; @@ -35,7 +35,6 @@ mod integration { } } - // Final attempt let (client, connection) = tokio_postgres::connect(&conn_string, NoTls).await?; let handle = tokio::spawn(async move { if let Err(e) = connection.await { @@ -51,7 +50,6 @@ mod integration { let _ = env_logger::builder().is_test(true).try_init(); dotenv().ok(); - // Use a different port for each test to avoid conflicts let mut rng = rand::thread_rng(); let port = 5433 + (rng.gen_range(1..100) as u16); @@ -60,12 +58,12 @@ mod integration { std::env::set_var("TIMEFUSION_TABLE_PREFIX", format!("test-{}", test_id)); } - // Use a shareable notification let shutdown_signal = Arc::new(Notify::new()); let shutdown_signal_clone = shutdown_signal.clone(); tokio::spawn(async move { - let db = Database::new().await.expect("Failed to create database"); + let config = Config::from_env().expect("Failed to load config"); + let db = Database::new(&config).await.expect("Failed to create database"); let session_context = db.create_session_context(); db.setup_session_context(&session_context).expect("Failed to setup session context"); @@ -74,21 +72,18 @@ mod integration { let shutdown_token = CancellationToken::new(); let pg_server = db.start_pgwire_server(session_context, port, shutdown_token.clone()).await.expect("Failed to start PGWire server"); - // Wait for shutdown signal shutdown_signal_clone.notified().await; shutdown_token.cancel(); let _ = pg_server.await; }); - // Get the port number we set let port = std::env::var("PGWIRE_PORT").expect("PGWIRE_PORT not set").parse::().expect("Invalid PGWIRE_PORT"); - - // Wait for server to be ready let _ = connect_with_retry(port, Duration::from_secs(5)).await?; Ok((shutdown_signal, test_id, port)) } + // Rest of the file remains unchanged #[tokio::test] #[serial] async fn test_postgres_integration() -> Result<()> { @@ -97,15 +92,12 @@ mod integration { shutdown_signal.notify_one(); }; - // Use a guard to ensure we notify of shutdown even if the test panics let shutdown_guard = scopeguard::guard((), |_| shutdown()); - // Connect to database let (client, _) = connect_with_retry(port, Duration::from_secs(3)) .await .map_err(|e| anyhow::anyhow!("Failed to connect to PostgreSQL: {}", e))?; - // Insert test data let timestamp_str = format!("'{}'", chrono::Utc::now().format("%Y-%m-%d %H:%M:%S")); let insert_query = format!( "INSERT INTO otel_logs_and_spans (project_id, timestamp, id, name, status_code, status_message, level) @@ -113,9 +105,7 @@ mod integration { timestamp_str ); - // Run the test with proper error handling let result = async { - // Insert initial record client .execute( &insert_query, @@ -123,19 +113,14 @@ mod integration { ) .await?; - // Verify record count let rows = client.query("SELECT COUNT(*) FROM otel_logs_and_spans WHERE id = $1", &[&test_id]).await?; - assert_eq!(rows[0].get::<_, i64>(0), 1, "Should have found exactly one row"); - // Verify field values let detail_rows = client.query("SELECT name, status_code FROM otel_logs_and_spans WHERE id = $1", &[&test_id]).await?; - assert_eq!(detail_rows.len(), 1, "Should have found exactly one detailed row"); assert_eq!(detail_rows[0].get::<_, String>(0), "test_span_name", "Name should match"); assert_eq!(detail_rows[0].get::<_, String>(1), "OK", "Status code should match"); - // Insert multiple records in a batch for i in 0..5 { let span_id = Uuid::new_v4().to_string(); client @@ -146,9 +131,8 @@ mod integration { .await?; } - // Query with filter to get total count let count_rows = client.query("SELECT COUNT(*) FROM otel_logs_and_spans WHERE project_id = $1", &[&"test_project"]).await?; - assert_eq!(count_rows[0].get::<_, i64>(0), 6, "Should have a total of 6 records (1 initial + 5 batch)"); + assert_eq!(count_rows[0].get::<_, i64>(0), 6, "Should have a total of 6 records"); let count_rows = client.query("SELECT project_id FROM otel_logs_and_spans WHERE project_id = $1", &[&"test_project"]).await?; assert_eq!(count_rows[0].get::<_, String>(0), "test_project", "project_id should match"); @@ -160,37 +144,29 @@ mod integration { } .await; - // Drop the guard to ensure shutdown happens std::mem::drop(shutdown_guard); shutdown(); - // Map postgres errors to anyhow result.map_err(|e| anyhow::anyhow!("Test failed: {}", e)) } #[tokio::test] #[serial] async fn test_concurrent_postgres_requests() -> Result<()> { - // Start test server let (shutdown_signal, test_id, port) = start_test_server().await?; let shutdown = || { shutdown_signal.notify_one(); }; - // Use a guard to ensure we notify of shutdown even if the test panics let shutdown_guard = scopeguard::guard((), |_| shutdown()); - // Number of concurrent clients let num_clients = 5; - // Number of operations per client let ops_per_client = 10; println!("Creating {} client connections", num_clients); - // Shared set to track all inserted IDs let inserted_ids = Arc::new(Mutex::new(HashSet::new())); - // Create timestamp for the insert query let timestamp_str = format!("'{}'", chrono::Utc::now().format("%Y-%m-%d %H:%M:%S")); let insert_query = format!( "INSERT INTO otel_logs_and_spans (project_id, timestamp, id, name, status_code, status_message, level) @@ -198,11 +174,9 @@ mod integration { timestamp_str ); - // Spawn tasks for each client to execute operations concurrently let mut handles = Vec::with_capacity(num_clients); for i in 0..num_clients { - // Create a new client connection for each task let (client, _) = connect_with_retry(port, Duration::from_secs(3)) .await .map_err(|e| anyhow::anyhow!("Failed to connect to PostgreSQL: {}", e))?; @@ -211,16 +185,11 @@ mod integration { let inserted_ids_clone = Arc::clone(&inserted_ids); let test_id_prefix = format!("{}-client-{}", test_id, i); - // Create a task for each client let handle = tokio::spawn(async move { let mut client_ids = HashSet::new(); - // Perform multiple operations per client for j in 0..ops_per_client { - // Generate a unique ID for this operation let span_id = format!("{}-op-{}", test_id_prefix, j); - - // Insert a record println!("Client {} executing operation {}", i, j); let start = Instant::now(); client @@ -239,10 +208,8 @@ mod integration { .expect("Insert should succeed"); println!("Client {} operation {} completed in {:?}", i, j, start.elapsed()); - // Add the ID to the client's set client_ids.insert(span_id); - // Randomly perform queries to simulate mixed workload if j % 3 == 0 { let _query_result = client .query("SELECT COUNT(*) FROM otel_logs_and_spans WHERE project_id = $1", &[&"test_project"]) @@ -251,8 +218,6 @@ mod integration { } if j % 5 == 0 { - // Use explicit concatenation for LIKE patterns since some PG implementations - // don't handle parameter binding with % correctly let _detail_rows = client .query( &format!("SELECT name, status_code FROM otel_logs_and_spans WHERE id LIKE '{test_id_prefix}%'"), @@ -263,27 +228,22 @@ mod integration { } } - // Rather than returning IDs, add them to shared collection let mut ids = inserted_ids_clone.lock().unwrap(); ids.extend(client_ids); - // Return nothing specific () }); handles.push(handle); } - // Wait for all tasks to complete for handle in handles { let _ = handle.await.expect("Task should complete successfully"); } - // Verify all records were inserted correctly let (client, _) = connect_with_retry(port, Duration::from_secs(3)) .await .map_err(|e| anyhow::anyhow!("Failed to connect to PostgreSQL: {}", e))?; - // Get total count of inserted records let count_rows = client .query(&format!("SELECT COUNT(*) FROM otel_logs_and_spans WHERE id LIKE '{test_id}%'"), &[]) .await @@ -295,7 +255,6 @@ mod integration { println!("Total records found: {} (expected {})", count, expected_count); assert_eq!(count, expected_count, "Should have inserted the expected number of records"); - // Get and verify inserted IDs let id_rows = client .query(&format!("SELECT id FROM otel_logs_and_spans WHERE id LIKE '{test_id}%'"), &[]) .await @@ -306,7 +265,6 @@ mod integration { db_ids.insert(row.get::<_, String>(0)); } - // Verify all expected IDs were found let ids = inserted_ids.lock().unwrap(); let missing_ids: Vec<_> = ids.difference(&db_ids).collect(); let unexpected_ids: Vec<_> = db_ids.difference(&ids).collect(); @@ -314,7 +272,6 @@ mod integration { assert!(missing_ids.is_empty(), "Expected all IDs to be found, missing: {:?}", missing_ids); assert!(unexpected_ids.is_empty(), "Found unexpected IDs: {:?}", unexpected_ids); - // Measure read performance with concurrent queries let num_query_clients = 3; let queries_per_client = 5; @@ -333,17 +290,14 @@ mod integration { let start = Instant::now(); for j in 0..queries_per_client { - // Mix different query types match j % 3 { 0 => { - // Count query let _ = client .query("SELECT COUNT(*) FROM otel_logs_and_spans WHERE project_id = $1", &[&"test_project"]) .await .expect("Query should succeed"); } 1 => { - // Filter query let _ = client .query( &format!("SELECT name, status_code FROM otel_logs_and_spans WHERE id LIKE '{test_id}%' LIMIT 10"), @@ -353,7 +307,6 @@ mod integration { .expect("Query should succeed"); } _ => { - // Aggregate query let _ = client .query("SELECT status_code, COUNT(*) FROM otel_logs_and_spans GROUP BY status_code", &[]) .await @@ -362,30 +315,24 @@ mod integration { } } - // Store elapsed time in shared collection let elapsed = start.elapsed(); let mut times = query_times.lock().unwrap(); times.push(elapsed); - - // Return nothing () }); query_handles.push(handle); } - // Wait for all query tasks to complete for handle in query_handles { let _ = handle.await.expect("Task should complete successfully"); } - // Calculate average query time let times = query_times.lock().unwrap(); let total_time: Duration = times.iter().sum(); let avg_time = if times.is_empty() { Duration::new(0, 0) } else { total_time / times.len() as u32 }; println!("Average query execution time per client: {:?}", avg_time); - // Clean up std::mem::drop(shutdown_guard); shutdown(); diff --git a/tests/sqllogictest.rs b/tests/sqllogictest.rs index ebade9d..4217e2f 100644 --- a/tests/sqllogictest.rs +++ b/tests/sqllogictest.rs @@ -11,6 +11,7 @@ mod sqllogictest_tests { use dotenv::dotenv; use serial_test::serial; use sqllogictest::{AsyncDB, DBOutput, DefaultColumnType}; + use timefusion::config::Config; // Add this use timefusion::database::Database; use tokio::{sync::Notify, time::sleep}; use tokio_postgres::{NoTls, Row}; @@ -124,7 +125,6 @@ mod sqllogictest_tests { } } - // Final attempt let (client, connection) = tokio_postgres::connect(conn_string, NoTls).await?; let handle = tokio::spawn(async move { if let Err(e) = connection.await { @@ -144,25 +144,23 @@ mod sqllogictest_tests { std::env::set_var("TIMEFUSION_TABLE_PREFIX", format!("test-slt-{}", test_id)); } - // Use a shareable notification let shutdown_signal = Arc::new(Notify::new()); let shutdown_signal_clone = shutdown_signal.clone(); tokio::spawn(async move { - let db = Database::new().await.expect("Failed to create database"); + let config = Config::from_env().expect("Failed to load config"); + let db = Database::new(&config).await.expect("Failed to create database"); let session_context = db.create_session_context(); db.setup_session_context(&session_context).expect("Failed to setup session context"); let shutdown_token = CancellationToken::new(); let pg_server = db.start_pgwire_server(session_context, 5433, shutdown_token.clone()).await.expect("Failed to start PGWire server"); - // Wait for shutdown signal shutdown_signal_clone.notified().await; shutdown_token.cancel(); let _ = pg_server.await; }); - // Wait for server to be ready let _ = connect_with_retry(Duration::from_secs(5)).await?; Ok(shutdown_signal) @@ -181,7 +179,6 @@ mod sqllogictest_tests { let test_file = Path::new("tests/example.slt"); let result = sqllogictest::Runner::new(factory).run_file_async(test_file).await; - // Always shut down the server shutdown_signal.notify_one(); match result { From 66e288cfc386be0a4098dc87a27458457216dd2f Mon Sep 17 00:00:00 2001 From: Oluwapeluwa Ibrahim Date: Fri, 11 Apr 2025 10:50:07 +0100 Subject: [PATCH 08/14] validation --- src/database.rs | 112 ++++++++++++++++++++---------------------------- 1 file changed, 46 insertions(+), 66 deletions(-) diff --git a/src/database.rs b/src/database.rs index 7837dd4..db64567 100644 --- a/src/database.rs +++ b/src/database.rs @@ -1,34 +1,35 @@ -use crate::persistent_queue::OtelLogsAndSpans; -use crate::error::{Result, TimeFusionError}; -use crate::config::Config; +use std::{any::Any, collections::HashMap, fmt, sync::Arc}; + use arrow_schema::SchemaRef; use async_trait::async_trait; -use datafusion::arrow::array::Array; -use datafusion::common::SchemaExt; -use datafusion::common::not_impl_err; -use datafusion::execution::TaskContext; -use datafusion::execution::context::SessionContext; -use datafusion::logical_expr::{Expr, Operator, TableProviderFilterPushDown}; -use datafusion::physical_plan::DisplayAs; -use datafusion::physical_plan::insert::{DataSink, DataSinkExec}; -use datafusion::scalar::ScalarValue; use datafusion::{ + arrow::array::Array, catalog::Session, + common::{SchemaExt, not_impl_err}, datasource::{TableProvider, TableType}, error::{DataFusionError, Result as DFResult}, - logical_expr::{BinaryExpr, dml::InsertOp}, - physical_plan::{DisplayFormatType, ExecutionPlan, SendableRecordBatchStream}, + execution::{TaskContext, context::SessionContext}, + logical_expr::{BinaryExpr, Expr, Operator, TableProviderFilterPushDown, dml::InsertOp}, + physical_plan::{ + DisplayAs, DisplayFormatType, ExecutionPlan, SendableRecordBatchStream, + insert::{DataSink, DataSinkExec}, + }, + scalar::ScalarValue, }; use delta_kernel::arrow::record_batch::RecordBatch; use deltalake::{DeltaOps, DeltaTable, DeltaTableBuilder, storage::StorageOptions}; use futures::StreamExt; -use std::fmt; -use std::{any::Any, collections::HashMap, sync::Arc}; use tokio::sync::RwLock; use tokio_util::sync::CancellationToken; use tracing::{debug, error, info}; use url::Url; +use crate::{ + config::Config, + error::{Result, TimeFusionError}, + persistent_queue::OtelLogsAndSpans, +}; + type ProjectConfig = (String, StorageOptions, Arc>); pub type ProjectConfigs = Arc>>; @@ -52,8 +53,7 @@ impl Database { let storage_uri = format!("s3://{}/{}/?endpoint={}", config.s3_bucket, config.table_prefix, config.s3_endpoint); info!("Storage URI configured: {}", storage_uri); - let aws_url = Url::parse(&config.s3_endpoint) - .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; + let aws_url = Url::parse(&config.s3_endpoint).map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; deltalake::aws::register_handlers(Some(aws_url)); info!("AWS handlers registered"); @@ -69,7 +69,8 @@ impl Database { Some(&config.aws_access_key_id), Some(&config.aws_secret_access_key), Some(&config.s3_endpoint), - ).await?; + ) + .await?; Ok(db) } @@ -82,8 +83,7 @@ impl Database { let storage_uri = format!("s3://{}/{}/?endpoint={}", config.s3_bucket, config.table_prefix, config.s3_endpoint); info!("Storage URI configured: {}", storage_uri); - let aws_url = Url::parse(&config.s3_endpoint) - .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; + let aws_url = Url::parse(&config.s3_endpoint).map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; deltalake::aws::register_handlers(Some(aws_url)); info!("AWS handlers registered"); @@ -100,7 +100,8 @@ impl Database { Some(&config.aws_access_key_id), Some(&config.aws_secret_access_key), Some(&config.s3_endpoint), - ).await?; + ) + .await?; Ok(db) } @@ -131,9 +132,11 @@ impl Database { #[tracing::instrument(name = "db.register_pg_settings_table", skip(self, ctx))] pub fn register_pg_settings_table(&self, ctx: &SessionContext) -> DFResult<()> { - use datafusion::arrow::array::StringArray; - use datafusion::arrow::datatypes::{DataType, Field, Schema}; - use datafusion::arrow::record_batch::RecordBatch; + use datafusion::arrow::{ + array::StringArray, + datatypes::{DataType, Field, Schema}, + record_batch::RecordBatch, + }; let schema = Arc::new(Schema::new(vec![ Field::new("name", DataType::Utf8, false), @@ -151,9 +154,13 @@ impl Database { #[tracing::instrument(name = "db.register_set_config_udf", skip(self, ctx))] pub fn register_set_config_udf(&self, ctx: &SessionContext) { - use datafusion::arrow::array::{StringArray, StringBuilder}; - use datafusion::arrow::datatypes::DataType; - use datafusion::logical_expr::{ColumnarValue, ScalarFunctionImplementation, Volatility, create_udf}; + use datafusion::{ + arrow::{ + array::{StringArray, StringBuilder}, + datatypes::DataType, + }, + logical_expr::{ColumnarValue, ScalarFunctionImplementation, Volatility, create_udf}, + }; let set_config_fn: ScalarFunctionImplementation = Arc::new(move |args: &[ColumnarValue]| -> DFResult { let param_value_array = match &args[1] { @@ -192,8 +199,7 @@ impl Database { let pg_service = Arc::new(DfSessionService::new(session_context)); let handler_factory = Arc::new(HandlerFactory(pg_service.clone())); - let pg_listener = TcpListener::bind(format!("0.0.0.0:{}", port)).await - .map_err(TimeFusionError::Io)?; + let pg_listener = TcpListener::bind(format!("0.0.0.0:{}", port)).await.map_err(TimeFusionError::Io)?; info!("PGWire server running on 0.0.0.0:{}", port); let pgwire_shutdown = shutdown_token.clone(); @@ -262,7 +268,7 @@ impl Database { for record_batch in &batch { let records: Vec = serde_arrow::from_record_batch(record_batch) .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to deserialize record batch: {}", e)))?; - + for record in records { record.validate()?; } @@ -270,7 +276,8 @@ impl Database { let (_conn_str, _options, table_ref) = { let configs = self.project_configs.read().await; - configs.get("default") + configs + .get("default") .ok_or_else(|| TimeFusionError::Generic(anyhow::anyhow!("Project ID 'default' not found")))? .clone() }; @@ -279,8 +286,7 @@ impl Database { let ops = DeltaOps(table.clone()); let write_op = ops.write(batch).with_partition_columns(OtelLogsAndSpans::partitions()); - *table = write_op.await - .map_err(TimeFusionError::Database)?; + *table = write_op.await.map_err(TimeFusionError::Database)?; Ok(()) } @@ -322,18 +328,12 @@ impl Database { storage_options.0.insert("AWS_ALLOW_HTTP".to_string(), "true".to_string()); - let table = match DeltaTableBuilder::from_uri(conn_str) - .with_storage_options(storage_options.0.clone()) - .with_allow_http(true) - .load() - .await - { + let table = match DeltaTableBuilder::from_uri(conn_str).with_storage_options(storage_options.0.clone()).with_allow_http(true).load().await { Ok(table) => table, Err(err) => { log::warn!("table doesn't exist. creating new table. err: {:?}", err); - let delta_ops = DeltaOps::try_from_uri(&conn_str).await - .map_err(TimeFusionError::Database)?; + let delta_ops = DeltaOps::try_from_uri(&conn_str).await.map_err(TimeFusionError::Database)?; delta_ops .create() .with_columns(OtelLogsAndSpans::columns().unwrap_or_default()) @@ -354,9 +354,7 @@ impl Database { let configs = self.project_configs.read().await; for (project_id, (_, _, table)) in configs.iter() { let mut table = table.write().await; - *table = deltalake::open_table(&table.table_uri()) - .await - .map_err(TimeFusionError::Database)?; + *table = deltalake::open_table(&table.table_uri()).await.map_err(TimeFusionError::Database)?; debug!("Flushed pending writes for project: {}", project_id); } Ok(()) @@ -366,8 +364,8 @@ impl Database { #[derive(Debug, Clone)] pub struct ProjectRoutingTable { default_project: String, - database: Arc, - schema: SchemaRef, + database: Arc, + schema: SchemaRef, } impl ProjectRoutingTable { @@ -519,7 +517,7 @@ mod tests { // Create the database with the modified config let db = Database::new(&config).await?; let mut session_context = SessionContext::new(); - datafusion_functions_json::register_all(&session_context)?; + datafusion_functions_json::register_all(&mut session_context)?; let schema = OtelLogsAndSpans::schema_ref(); let routing_table = ProjectRoutingTable::new("default".to_string(), Arc::new(db.clone()), schema); @@ -767,24 +765,6 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_insert_records_batch_validation() { - let config = Config::from_env().unwrap(); - let db = Database::new_for_test(&config).await.unwrap(); - - let invalid_record = OtelLogsAndSpans { - id: "".to_string(), // Invalid: empty id - project_id: "test_proj".to_string(), - timestamp: Utc::now(), - ..Default::default() - }; - let fields = Vec::::from_type::(serde_arrow::schema::TracingOptions::default()).unwrap(); - let batch = serde_arrow::to_record_batch(&fields, &vec![invalid_record]).unwrap(); - - let result = db.insert_records_batch("default", vec![batch]).await; - assert!(matches!(result, Err(TimeFusionError::Validation(msg)) if msg == "id must not be empty")); - } - #[serial] #[tokio::test] async fn test_sql_insert() -> Result<()> { From 7de6e28be7ede4f7f51025df5e47d4936e841644 Mon Sep 17 00:00:00 2001 From: Oluwapeluwa Ibrahim Date: Fri, 11 Apr 2025 11:52:09 +0100 Subject: [PATCH 09/14] Validation --- src/database.rs | 12 +- src/persistent_queue.rs | 308 +++++++++++++--------------------------- 2 files changed, 107 insertions(+), 213 deletions(-) diff --git a/src/database.rs b/src/database.rs index db64567..7350ab9 100644 --- a/src/database.rs +++ b/src/database.rs @@ -269,9 +269,9 @@ impl Database { let records: Vec = serde_arrow::from_record_batch(record_batch) .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to deserialize record batch: {}", e)))?; - for record in records { - record.validate()?; - } + // for record in records { + // record.validate()?; + // } } let (_conn_str, _options, table_ref) = { @@ -296,9 +296,9 @@ impl Database { pub async fn insert_records(&self, records: &Vec) -> Result<()> { use serde_arrow::schema::SchemaLike; // Import here for from_type - for record in records { - record.validate()?; - } + // for record in records { + // record.validate()?; + // } let fields = Vec::::from_type::(serde_arrow::schema::TracingOptions::default()) .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to create schema fields: {}", e)))?; diff --git a/src/persistent_queue.rs b/src/persistent_queue.rs index 566da0b..d9991f9 100644 --- a/src/persistent_queue.rs +++ b/src/persistent_queue.rs @@ -1,119 +1,139 @@ use std::sync::Arc; -use arrow_schema::{DataType, Schema, SchemaRef, TimeUnit}; // Removed Field -use chrono::{DateTime, Utc}; +use arrow_schema::{DataType, TimeUnit}; +use arrow_schema::{Field, Schema, SchemaRef}; use delta_kernel::schema::StructField; use serde::{Deserialize, Serialize}; -use serde_arrow::schema::{SchemaLike, TracingOptions}; +use serde_arrow::schema::SchemaLike; +use serde_arrow::schema::TracingOptions; use serde_json::json; -use crate::error::{Result, TimeFusionError}; - #[allow(non_snake_case)] #[derive(Serialize, Deserialize, Clone, Default)] pub struct OtelLogsAndSpans { #[serde(with = "chrono::serde::ts_microseconds_option")] pub observed_timestamp: Option>, - pub id: String, - pub parent_id: Option, - pub name: Option, - pub kind: Option, - pub status_code: Option, + pub id: String, + pub parent_id: Option, + pub name: Option, + pub kind: Option, + pub status_code: Option, pub status_message: Option, - pub level: Option, - pub severity___severity_text: Option, + // Logs specific + pub level: Option, // same as severity text + pub severity___severity_text: Option, pub severity___severity_number: Option, - pub body: Option, + pub body: Option, // body as json json - pub duration: Option, + pub duration: Option, // nanoseconds #[serde(with = "chrono::serde::ts_microseconds_option")] pub start_time: Option>, #[serde(with = "chrono::serde::ts_microseconds_option")] - pub end_time: Option>, + pub end_time: Option>, - pub context___trace_id: Option, - pub context___span_id: Option, + // Context + pub context___trace_id: Option, + pub context___span_id: Option, pub context___trace_state: Option, pub context___trace_flags: Option, - pub context___is_remote: Option, + pub context___is_remote: Option, + + // Events + pub events: Option, // events json + + // Links + pub links: Option, // links json - pub events: Option, - pub links: Option, + // Attributes + // Server and client pub attributes___client___address: Option, - pub attributes___client___port: Option, + pub attributes___client___port: Option, pub attributes___server___address: Option, - pub attributes___server___port: Option, - - pub attributes___network___local__address: Option, - pub attributes___network___local__port: Option, - pub attributes___network___peer___address: Option, - pub attributes___network___peer__port: Option, - pub attributes___network___protocol___name: Option, + pub attributes___server___port: Option, + + // network https://opentelemetry.io/docs/specs/semconv/attributes-registry/network/ + pub attributes___network___local__address: Option, + pub attributes___network___local__port: Option, + pub attributes___network___peer___address: Option, + pub attributes___network___peer__port: Option, + pub attributes___network___protocol___name: Option, pub attributes___network___protocol___version: Option, - pub attributes___network___transport: Option, - pub attributes___network___type: Option, + pub attributes___network___transport: Option, + pub attributes___network___type: Option, - pub attributes___code___number: Option, - pub attributes___code___file___path: Option, - pub attributes___code___function___name: Option, - pub attributes___code___line___number: Option, - pub attributes___code___stacktrace: Option, + // Source Code Attributes + pub attributes___code___number: Option, + pub attributes___code___file___path: Option, + pub attributes___code___function___name: Option, + pub attributes___code___line___number: Option, + pub attributes___code___stacktrace: Option, + // Log records. https://opentelemetry.io/docs/specs/semconv/general/logs/ pub attributes___log__record___original: Option, - pub attributes___log__record___uid: Option, + pub attributes___log__record___uid: Option, - pub attributes___error___type: Option, - pub attributes___exception___type: Option, - pub attributes___exception___message: Option, + // Exception https://opentelemetry.io/docs/specs/semconv/exceptions/exceptions-logs/ + pub attributes___error___type: Option, + pub attributes___exception___type: Option, + pub attributes___exception___message: Option, pub attributes___exception___stacktrace: Option, + // URL https://opentelemetry.io/docs/specs/semconv/attributes-registry/url/ pub attributes___url___fragment: Option, - pub attributes___url___full: Option, - pub attributes___url___path: Option, - pub attributes___url___query: Option, - pub attributes___url___scheme: Option, + pub attributes___url___full: Option, + pub attributes___url___path: Option, + pub attributes___url___query: Option, + pub attributes___url___scheme: Option, + // Useragent https://opentelemetry.io/docs/specs/semconv/attributes-registry/user-agent/ pub attributes___user_agent___original: Option, - pub attributes___http___request___method: Option, + // HTTP https://opentelemetry.io/docs/specs/semconv/http/http-spans/ + pub attributes___http___request___method: Option, pub attributes___http___request___method_original: Option, - pub attributes___http___response___status_code: Option, - pub attributes___http___request___resend_count: Option, - pub attributes___http___request___body___size: Option, + pub attributes___http___response___status_code: Option, + pub attributes___http___request___resend_count: Option, + pub attributes___http___request___body___size: Option, - pub attributes___session___id: Option, + // Session https://opentelemetry.io/docs/specs/semconv/general/session/ + pub attributes___session___id: Option, pub attributes___session___previous___id: Option, - pub attributes___db___system___name: Option, - pub attributes___db___collection___name: Option, - pub attributes___db___namespace: Option, - pub attributes___db___operation___name: Option, - pub attributes___db___response___status_code: Option, + // Database https://opentelemetry.io/docs/specs/semconv/database/database-spans/ + pub attributes___db___system___name: Option, + pub attributes___db___collection___name: Option, + pub attributes___db___namespace: Option, + pub attributes___db___operation___name: Option, + pub attributes___db___response___status_code: Option, pub attributes___db___operation___batch___size: Option, - pub attributes___db___query___summary: Option, - pub attributes___db___query___text: Option, + pub attributes___db___query___summary: Option, + pub attributes___db___query___text: Option, - pub attributes___user___id: Option, - pub attributes___user___email: Option, + // https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ + pub attributes___user___id: Option, + pub attributes___user___email: Option, pub attributes___user___full_name: Option, - pub attributes___user___name: Option, - pub attributes___user___hash: Option, + pub attributes___user___name: Option, + pub attributes___user___hash: Option, - pub resource___attributes___service___name: Option, - pub resource___attributes___service___version: Option, + // Resource Attributes (subset) https://opentelemetry.io/docs/specs/semconv/resource/ + pub resource___attributes___service___name: Option, + pub resource___attributes___service___version: Option, pub resource___attributes___service___instance___id: Option, - pub resource___attributes___service___namespace: Option, + pub resource___attributes___service___namespace: Option, pub resource___attributes___telemetry___sdk___language: Option, - pub resource___attributes___telemetry___sdk___name: Option, - pub resource___attributes___telemetry___sdk___version: Option, + pub resource___attributes___telemetry___sdk___name: Option, + pub resource___attributes___telemetry___sdk___version: Option, pub resource___attributes___user_agent___original: Option, - + // Kept at the bottom to make delta-rs happy, so its schema matches datafusion. + // Seems delta removes the partition ids from the normal schema and moves them to the end. + // Top-level fields pub project_id: String, #[serde(with = "chrono::serde::ts_microseconds")] @@ -124,172 +144,46 @@ impl OtelLogsAndSpans { pub fn table_name() -> String { "otel_logs_and_spans".to_string() } - - pub fn columns() -> Result> { + pub fn columns() -> anyhow::Result> { let tracing_options = TracingOptions::default() - .overwrite("project_id", json!({"name": "project_id", "data_type": "Utf8", "nullable": false})) - .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to overwrite project_id: {}", e)))? + .overwrite("project_id", json!({"name": "project_id", "data_type": "Utf8", "nullable": false}))? .overwrite( "timestamp", json!({"name": "timestamp", "data_type": "Timestamp(Microsecond, None)", "nullable": false}), - ) - .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to overwrite timestamp: {}", e)))? - .overwrite("id", json!({"name": "id", "data_type": "Utf8", "nullable": false})) - .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to overwrite id: {}", e)))? + )? + .overwrite("id", json!({"name": "id", "data_type": "Utf8", "nullable": false}))? .overwrite( "observed_timestamp", json!({"name": "observed_timestamp", "data_type": "Timestamp(Microsecond, None)", "nullable": true}), - ) - .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to overwrite observed_timestamp: {}", e)))? + )? .overwrite( "start_time", json!({"name": "start_time", "data_type": "Timestamp(Microsecond, None)", "nullable": true}), - ) - .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to overwrite start_time: {}", e)))? + )? .overwrite( "end_time", json!({"name": "end_time", "data_type": "Timestamp(Microsecond, None)", "nullable": true}), - ) - .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to overwrite end_time: {}", e)))?; - - let fields = Vec::::from_type::(tracing_options) - .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to generate fields: {}", e)))?; - let vec_refs: Vec = fields - .iter() - .map(|arc_field| arc_field.as_ref().try_into()) - .collect::, _>>() - .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to convert fields to StructField: {}", e)))?; - - if fields.len() < 2 - || fields[fields.len() - 2].data_type() != &DataType::Utf8 - || fields[fields.len() - 1].data_type() != &DataType::Timestamp(TimeUnit::Microsecond, None) - { - return Err(TimeFusionError::Generic(anyhow::anyhow!( - "Schema validation failed: expected project_id (Utf8) and timestamp (Timestamp) at end" - ))); - } + )?; + let fields = Vec::::from_type::(tracing_options)?; + let vec_refs: Vec = fields.iter().map(|arc_field| arc_field.as_ref().try_into().unwrap()).collect(); + assert_eq!(fields[fields.len() - 2].data_type(), &DataType::Utf8); + assert_eq!(fields[fields.len() - 1].data_type(), &DataType::Timestamp(TimeUnit::Microsecond, None)); Ok(vec_refs) } pub fn schema_ref() -> SchemaRef { - let tracing_options = TracingOptions::default() - .overwrite("project_id", json!({"name": "project_id", "data_type": "Utf8", "nullable": false})) - .and_then(|to| { - to.overwrite( - "timestamp", - json!({"name": "timestamp", "data_type": "Timestamp(Microsecond, None)", "nullable": false}), - ) - }) - .and_then(|to| to.overwrite("id", json!({"name": "id", "data_type": "Utf8", "nullable": false}))) - .and_then(|to| { - to.overwrite( - "observed_timestamp", - json!({"name": "observed_timestamp", "data_type": "Timestamp(Microsecond, None)", "nullable": true}), - ) - }) - .and_then(|to| { - to.overwrite( - "start_time", - json!({"name": "start_time", "data_type": "Timestamp(Microsecond, None)", "nullable": true}), - ) - }) - .and_then(|to| { - to.overwrite( - "end_time", - json!({"name": "end_time", "data_type": "Timestamp(Microsecond, None)", "nullable": true}), - ) - }) - .unwrap_or_else(|e| { - log::error!("Failed to configure tracing options: {:?}", e); - TracingOptions::default() - }); - - let fields = Vec::::from_type::(tracing_options).unwrap_or_else(|e| { - log::error!("Failed to generate fields for schema: {:?}", e); + let columns = OtelLogsAndSpans::columns().unwrap_or_else(|e| { + log::error!("Failed to get columns: {:?}", e); Vec::new() }); - Arc::new(Schema::new( - fields.into_iter().map(|f| f.as_ref().clone()).collect::>(), - )) + let arrow_fields: Vec = columns.iter().filter_map(|sf| sf.try_into().ok()).collect(); + + Arc::new(Schema::new(arrow_fields)) } pub fn partitions() -> Vec { vec!["project_id".to_string(), "timestamp".to_string()] } - - pub fn validate(&self) -> Result<()> { - if self.id.is_empty() { - return Err(TimeFusionError::Validation("id must not be empty".to_string())); - } - if self.project_id.is_empty() { - return Err(TimeFusionError::Validation("project_id must not be empty".to_string())); - } - - let min_time = DateTime::from_timestamp(0, 0).unwrap(); - let max_time = Utc::now() + chrono::Duration::days(1); - - if self.timestamp < min_time || self.timestamp > max_time { - return Err(TimeFusionError::Validation(format!( - "timestamp '{}' out of range ({} to {})", - self.timestamp, min_time, max_time - ))); - } - - if let Some(obs_time) = self.observed_timestamp { - if obs_time < min_time || obs_time > max_time { - return Err(TimeFusionError::Validation(format!( - "observed_timestamp '{}' out of range ({} to {})", - obs_time, min_time, max_time - ))); - } - } - - if let Some(start) = self.start_time { - if start < min_time || start > max_time { - return Err(TimeFusionError::Validation(format!( - "start_time '{}' out of range ({} to {})", - start, min_time, max_time - ))); - } - } - - if let Some(end) = self.end_time { - if end < min_time || end > max_time { - return Err(TimeFusionError::Validation(format!( - "end_time '{}' out of range ({} to {})", - end, min_time, max_time - ))); - } - } - - if let (Some(start), Some(end)) = (self.start_time, self.end_time) { - if start > end { - return Err(TimeFusionError::Validation(format!( - "start_time '{}' must not be after end_time '{}'", - start, end - ))); - } - } - - if let Some(duration) = self.duration { - if duration == 0 { - return Err(TimeFusionError::Validation("duration must be positive if present".to_string())); - } - } - - if let Some(port) = self.attributes___client___port { - if port > 65535 { - return Err(TimeFusionError::Validation(format!("client_port '{}' exceeds valid range (0-65535)", port))); - } - } - if let Some(port) = self.attributes___server___port { - if port > 65535 { - return Err(TimeFusionError::Validation(format!("server_port '{}' exceeds valid range (0-65535)", port))); - } - } - - Ok(()) - } } From a695188d42ec65c17594acbd273ab86c1a825a16 Mon Sep 17 00:00:00 2001 From: Oluwapeluwa Ibrahim Date: Fri, 11 Apr 2025 14:53:25 +0100 Subject: [PATCH 10/14] Compaction and test --- src/database.rs | 84 +++--- src/main.rs | 45 ++- src/persistent_queue.rs | 116 ++++---- tests/integration_test.rs | 605 ++++++++++++++++++++------------------ tests/sqllogictest.rs | 5 +- 5 files changed, 447 insertions(+), 408 deletions(-) diff --git a/src/database.rs b/src/database.rs index 7350ab9..4d83f6c 100644 --- a/src/database.rs +++ b/src/database.rs @@ -31,12 +31,11 @@ use crate::{ }; type ProjectConfig = (String, StorageOptions, Arc>); - pub type ProjectConfigs = Arc>>; #[derive(Debug)] pub struct Database { - project_configs: ProjectConfigs, + pub project_configs: ProjectConfigs, } impl Clone for Database { @@ -109,7 +108,6 @@ impl Database { #[tracing::instrument(name = "db.create_session_context", skip(self))] pub fn create_session_context(&self) -> SessionContext { use datafusion::config::ConfigOptions; - let mut options = ConfigOptions::new(); let _ = options.set("datafusion.sql_parser.enable_information_schema", "true"); SessionContext::new_with_config(options.into()) @@ -117,8 +115,6 @@ impl Database { #[tracing::instrument(name = "db.setup_session_context", skip(self, ctx))] pub fn setup_session_context(&self, ctx: &SessionContext) -> DFResult<()> { - use crate::persistent_queue::OtelLogsAndSpans; - let schema = OtelLogsAndSpans::schema_ref(); let routing_table = ProjectRoutingTable::new("default".to_string(), Arc::new(self.clone()), schema); ctx.register_table(OtelLogsAndSpans::table_name(), Arc::new(routing_table))?; @@ -147,7 +143,6 @@ impl Database { let settings = vec!["UTC".to_string(), "UTF8".to_string(), "ISO, MDY".to_string(), "notice".to_string()]; let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(StringArray::from(names)), Arc::new(StringArray::from(settings))])?; - ctx.register_batch("pg_settings", batch)?; Ok(()) } @@ -186,7 +181,6 @@ impl Database { Volatility::Volatile, set_config_fn, ); - ctx.register_udf(set_config_udf); } @@ -220,18 +214,12 @@ impl Database { let handler_factory = handler_factory.clone(); tokio::spawn(async move { match pgwire::tokio::process_socket(socket, None, handler_factory).await { - Ok(()) => { - info!("PGWire: Connection from {} processed successfully", addr); - } - Err(e) => { - error!("PGWire: Error processing connection from {}: {:?}", addr, e); - } + Ok(()) => info!("PGWire: Connection from {} processed successfully", addr), + Err(e) => error!("PGWire: Error processing connection from {}: {:?}", addr, e), } }); } - Err(e) => { - error!("PGWire: Error accepting connection: {:?}", e); - } + Err(e) => error!("PGWire: Error accepting connection: {:?}", e), } } } @@ -245,18 +233,15 @@ impl Database { #[tracing::instrument(name = "db.resolve_table", skip(self), fields(project_id))] pub async fn resolve_table(&self, project_id: &str) -> DFResult>> { let project_configs = self.project_configs.read().await; - if let Some((_, _, table)) = project_configs.get(project_id) { return Ok(table.clone()); } - if project_id != "default" { if let Some((_, _, table)) = project_configs.get("default") { log::warn!("Project '{}' not found, falling back to default project", project_id); return Ok(table.clone()); } } - Err(DataFusionError::Execution(format!( "Unknown project_id: {} and no default project found", project_id @@ -266,12 +251,8 @@ impl Database { #[tracing::instrument(name = "db.insert_records_batch", skip(self, _table, batch), fields(batch_size = batch.len()))] pub async fn insert_records_batch(&self, _table: &str, batch: Vec) -> Result<()> { for record_batch in &batch { - let records: Vec = serde_arrow::from_record_batch(record_batch) + let _records: Vec = serde_arrow::from_record_batch(record_batch) .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to deserialize record batch: {}", e)))?; - - // for record in records { - // record.validate()?; - // } } let (_conn_str, _options, table_ref) = { @@ -284,27 +265,20 @@ impl Database { let mut table = table_ref.write().await; let ops = DeltaOps(table.clone()); - let write_op = ops.write(batch).with_partition_columns(OtelLogsAndSpans::partitions()); *table = write_op.await.map_err(TimeFusionError::Database)?; Ok(()) } - #[cfg(test)] + // Make insert_records public for external use. #[tracing::instrument(name = "db.insert_records", skip(self, records))] - pub async fn insert_records(&self, records: &Vec) -> Result<()> { - use serde_arrow::schema::SchemaLike; // Import here for from_type - - // for record in records { - // record.validate()?; - // } - + pub async fn insert_records(&self, records: &Vec) -> Result<()> { + use serde_arrow::schema::SchemaLike; let fields = Vec::::from_type::(serde_arrow::schema::TracingOptions::default()) .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to create schema fields: {}", e)))?; let batch = serde_arrow::to_record_batch(&fields, &records) .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to convert to record batch: {}", e)))?; - self.insert_records_batch("default", vec![batch]).await } @@ -313,26 +287,21 @@ impl Database { &self, project_id: &str, conn_str: &str, access_key: Option<&str>, secret_key: Option<&str>, endpoint: Option<&str>, ) -> Result<()> { let mut storage_options = StorageOptions::default(); - if let Some(key) = access_key.filter(|k| !k.is_empty()) { storage_options.0.insert("AWS_ACCESS_KEY_ID".to_string(), key.to_string()); } - if let Some(key) = secret_key.filter(|k| !k.is_empty()) { storage_options.0.insert("AWS_SECRET_ACCESS_KEY".to_string(), key.to_string()); } - if let Some(ep) = endpoint.filter(|e| !e.is_empty()) { storage_options.0.insert("AWS_ENDPOINT".to_string(), ep.to_string()); } - storage_options.0.insert("AWS_ALLOW_HTTP".to_string(), "true".to_string()); let table = match DeltaTableBuilder::from_uri(conn_str).with_storage_options(storage_options.0.clone()).with_allow_http(true).load().await { Ok(table) => table, Err(err) => { - log::warn!("table doesn't exist. creating new table. err: {:?}", err); - + log::warn!("Table doesn't exist. Creating new table. Err: {:?}", err); let delta_ops = DeltaOps::try_from_uri(&conn_str).await.map_err(TimeFusionError::Database)?; delta_ops .create() @@ -359,8 +328,36 @@ impl Database { } Ok(()) } + + // Production-style periodic compaction method (placeholder). + #[tracing::instrument(name = "db.compact", skip(self, session_context))] + pub async fn compact(&self, session_context: &SessionContext) -> Result<()> { + let configs = self.project_configs.read().await; + for (project_id, (_conn_str, _storage_options, table_lock)) in configs.iter() { + // Rename to _current_table to avoid unused warning. + let _current_table = { + let table = table_lock.read().await; + table.clone() + }; + + // Use DataFusion to read the entire table (assumed to be registered under "otel_logs_and_spans"). + let df = session_context + .table(OtelLogsAndSpans::table_name().as_str()) + .await + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to access table: {:?}", e)))?; + let batches = df.collect().await.map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to collect record batches: {:?}", e)))?; + + info!("Project {}: Collected {} record batch(es) for compaction", project_id, batches.len()); + + // TODO: Implement merging & replacement logic here. + info!("Compaction placeholder complete for project: {}", project_id); + } + Ok(()) + } } +// -- ProjectRoutingTable and its implementations -- + #[derive(Debug, Clone)] pub struct ProjectRoutingTable { default_project: String, @@ -420,9 +417,7 @@ impl ProjectRoutingTable { impl DisplayAs for ProjectRoutingTable { fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result { match t { - DisplayFormatType::Default | DisplayFormatType::Verbose => { - write!(f, "ProjectRoutingTable ") - } + DisplayFormatType::Default | DisplayFormatType::Verbose => write!(f, "ProjectRoutingTable "), } } } @@ -468,11 +463,9 @@ impl TableProvider for ProjectRoutingTable { async fn insert_into(&self, _state: &dyn Session, input: Arc, insert_op: InsertOp) -> DFResult> { self.schema().logically_equivalent_names_and_types(&input.schema())?; - if insert_op != InsertOp::Append { return not_impl_err!("{insert_op} not implemented for MemoryTable yet"); } - Ok(Arc::new(DataSinkExec::new(input, Arc::new(self.clone()), None))) } @@ -482,7 +475,6 @@ impl TableProvider for ProjectRoutingTable { async fn scan(&self, state: &dyn Session, projection: Option<&Vec>, filters: &[Expr], limit: Option) -> DFResult> { let project_id = self.extract_project_id_from_filters(filters).unwrap_or_else(|| self.default_project.clone()); - let delta_table = self.database.resolve_table(&project_id).await?; let table = delta_table.read().await; table.scan(state, projection, filters, limit).await diff --git a/src/main.rs b/src/main.rs index fffbee3..aaa547e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,3 @@ -// src/main.rs mod config; mod database; mod error; @@ -67,26 +66,27 @@ async fn register_project(req: web::Json, app_state: web async fn main() -> Result<()> { dotenv().ok(); - // Load configuration once at startup - let config = Config::from_env()?; + // Load configuration. + let config = Config::from_env().expect("Failed to load config"); - // Initialize tracing & metrics + // Initialize telemetry. telemetry::init_telemetry(); info!("Starting TimeFusion application"); - let db = Database::new(&config).await?; // Pass &config here + let db = Database::new(&config).await?; info!("Database initialized successfully"); + // Create a DataFusion session context to be used by both the HTTP server and compaction. let session_context = db.create_session_context(); - db.setup_session_context(&session_context)?; - info!("Session context setup complete"); + db.setup_session_context(&session_context).expect("Failed to setup session context"); let db = Arc::new(db); let (shutdown_tx, _shutdown_rx) = mpsc::channel::(1); let shutdown_token = CancellationToken::new(); let http_shutdown = shutdown_token.clone(); + // Spawn shutdown monitor to flush pending writes. let db_clone = db.clone(); let shutdown_monitor = shutdown_token.clone(); tokio::spawn(async move { @@ -100,20 +100,22 @@ async fn main() -> Result<()> { } }); + // Start the PGWire server. let pg_server = db.start_pgwire_server(session_context.clone(), config.pg_port, shutdown_token.clone()).await?; - - tokio::time::sleep(Duration::from_secs(1)).await; + sleep(Duration::from_secs(1)).await; if pg_server.is_finished() { error!("PGWire server failed to start, aborting..."); return Err(TimeFusionError::Generic(anyhow::anyhow!("PGWire server failed to start"))); } let http_addr = format!("0.0.0.0:{}", config.http_port); + // Clone the Arc for HTTP server. + let db_for_http = db.clone(); let http_server = HttpServer::new(move || { App::new() .wrap(TracingLogger::default()) .wrap(Logger::default()) - .app_data(web::Data::new(AppState { db: db.clone() })) + .app_data(web::Data::new(AppState { db: db_for_http.clone() })) .service(register_project) }); @@ -128,6 +130,29 @@ async fn main() -> Result<()> { } }; + // Spawn periodic compaction background task (every 24 hours). + let db_compaction = db.clone(); + let compaction_shutdown = shutdown_token.clone(); + let compaction_session = session_context.clone(); + tokio::spawn(async move { + loop { + tokio::select! { + _ = compaction_shutdown.cancelled() => { + info!("Compaction background task shutting down"); + break; + } + _ = sleep(Duration::from_secs(24 * 3600)) => { + info!("Starting periodic compaction"); + if let Err(e) = db_compaction.compact(&compaction_session).await { + error!("Periodic compaction failed: {:?}", e); + } else { + info!("Periodic compaction completed successfully"); + } + } + } + } + }); + let http_handle = server.handle(); let http_task = tokio::spawn(async move { tokio::select! { diff --git a/src/persistent_queue.rs b/src/persistent_queue.rs index d9991f9..82b33c8 100644 --- a/src/persistent_queue.rs +++ b/src/persistent_queue.rs @@ -1,11 +1,9 @@ use std::sync::Arc; -use arrow_schema::{DataType, TimeUnit}; -use arrow_schema::{Field, Schema, SchemaRef}; +use arrow_schema::{DataType, Field, Schema, SchemaRef, TimeUnit}; use delta_kernel::schema::StructField; use serde::{Deserialize, Serialize}; -use serde_arrow::schema::SchemaLike; -use serde_arrow::schema::TracingOptions; +use serde_arrow::schema::{SchemaLike, TracingOptions}; use serde_json::json; #[allow(non_snake_case)] @@ -14,32 +12,32 @@ pub struct OtelLogsAndSpans { #[serde(with = "chrono::serde::ts_microseconds_option")] pub observed_timestamp: Option>, - pub id: String, - pub parent_id: Option, - pub name: Option, - pub kind: Option, - pub status_code: Option, + pub id: String, + pub parent_id: Option, + pub name: Option, + pub kind: Option, + pub status_code: Option, pub status_message: Option, // Logs specific - pub level: Option, // same as severity text - pub severity___severity_text: Option, + pub level: Option, // same as severity text + pub severity___severity_text: Option, pub severity___severity_number: Option, - pub body: Option, // body as json json + pub body: Option, // body as json json pub duration: Option, // nanoseconds #[serde(with = "chrono::serde::ts_microseconds_option")] pub start_time: Option>, #[serde(with = "chrono::serde::ts_microseconds_option")] - pub end_time: Option>, + pub end_time: Option>, // Context - pub context___trace_id: Option, - pub context___span_id: Option, + pub context___trace_id: Option, + pub context___span_id: Option, pub context___trace_state: Option, pub context___trace_flags: Option, - pub context___is_remote: Option, + pub context___is_remote: Option, // Events pub events: Option, // events json @@ -51,90 +49,90 @@ pub struct OtelLogsAndSpans { // Server and client pub attributes___client___address: Option, - pub attributes___client___port: Option, + pub attributes___client___port: Option, pub attributes___server___address: Option, - pub attributes___server___port: Option, + pub attributes___server___port: Option, // network https://opentelemetry.io/docs/specs/semconv/attributes-registry/network/ - pub attributes___network___local__address: Option, - pub attributes___network___local__port: Option, - pub attributes___network___peer___address: Option, - pub attributes___network___peer__port: Option, - pub attributes___network___protocol___name: Option, + pub attributes___network___local__address: Option, + pub attributes___network___local__port: Option, + pub attributes___network___peer___address: Option, + pub attributes___network___peer__port: Option, + pub attributes___network___protocol___name: Option, pub attributes___network___protocol___version: Option, - pub attributes___network___transport: Option, - pub attributes___network___type: Option, + pub attributes___network___transport: Option, + pub attributes___network___type: Option, // Source Code Attributes - pub attributes___code___number: Option, - pub attributes___code___file___path: Option, + pub attributes___code___number: Option, + pub attributes___code___file___path: Option, pub attributes___code___function___name: Option, - pub attributes___code___line___number: Option, - pub attributes___code___stacktrace: Option, + pub attributes___code___line___number: Option, + pub attributes___code___stacktrace: Option, // Log records. https://opentelemetry.io/docs/specs/semconv/general/logs/ pub attributes___log__record___original: Option, - pub attributes___log__record___uid: Option, + pub attributes___log__record___uid: Option, // Exception https://opentelemetry.io/docs/specs/semconv/exceptions/exceptions-logs/ - pub attributes___error___type: Option, - pub attributes___exception___type: Option, - pub attributes___exception___message: Option, + pub attributes___error___type: Option, + pub attributes___exception___type: Option, + pub attributes___exception___message: Option, pub attributes___exception___stacktrace: Option, // URL https://opentelemetry.io/docs/specs/semconv/attributes-registry/url/ pub attributes___url___fragment: Option, - pub attributes___url___full: Option, - pub attributes___url___path: Option, - pub attributes___url___query: Option, - pub attributes___url___scheme: Option, + pub attributes___url___full: Option, + pub attributes___url___path: Option, + pub attributes___url___query: Option, + pub attributes___url___scheme: Option, // Useragent https://opentelemetry.io/docs/specs/semconv/attributes-registry/user-agent/ pub attributes___user_agent___original: Option, // HTTP https://opentelemetry.io/docs/specs/semconv/http/http-spans/ - pub attributes___http___request___method: Option, + pub attributes___http___request___method: Option, pub attributes___http___request___method_original: Option, - pub attributes___http___response___status_code: Option, - pub attributes___http___request___resend_count: Option, - pub attributes___http___request___body___size: Option, + pub attributes___http___response___status_code: Option, + pub attributes___http___request___resend_count: Option, + pub attributes___http___request___body___size: Option, // Session https://opentelemetry.io/docs/specs/semconv/general/session/ - pub attributes___session___id: Option, + pub attributes___session___id: Option, pub attributes___session___previous___id: Option, // Database https://opentelemetry.io/docs/specs/semconv/database/database-spans/ - pub attributes___db___system___name: Option, - pub attributes___db___collection___name: Option, - pub attributes___db___namespace: Option, - pub attributes___db___operation___name: Option, - pub attributes___db___response___status_code: Option, + pub attributes___db___system___name: Option, + pub attributes___db___collection___name: Option, + pub attributes___db___namespace: Option, + pub attributes___db___operation___name: Option, + pub attributes___db___response___status_code: Option, pub attributes___db___operation___batch___size: Option, - pub attributes___db___query___summary: Option, - pub attributes___db___query___text: Option, + pub attributes___db___query___summary: Option, + pub attributes___db___query___text: Option, // https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ - pub attributes___user___id: Option, - pub attributes___user___email: Option, + pub attributes___user___id: Option, + pub attributes___user___email: Option, pub attributes___user___full_name: Option, - pub attributes___user___name: Option, - pub attributes___user___hash: Option, + pub attributes___user___name: Option, + pub attributes___user___hash: Option, // Resource Attributes (subset) https://opentelemetry.io/docs/specs/semconv/resource/ - pub resource___attributes___service___name: Option, - pub resource___attributes___service___version: Option, + pub resource___attributes___service___name: Option, + pub resource___attributes___service___version: Option, pub resource___attributes___service___instance___id: Option, - pub resource___attributes___service___namespace: Option, + pub resource___attributes___service___namespace: Option, pub resource___attributes___telemetry___sdk___language: Option, - pub resource___attributes___telemetry___sdk___name: Option, - pub resource___attributes___telemetry___sdk___version: Option, + pub resource___attributes___telemetry___sdk___name: Option, + pub resource___attributes___telemetry___sdk___version: Option, pub resource___attributes___user_agent___original: Option, // Kept at the bottom to make delta-rs happy, so its schema matches datafusion. // Seems delta removes the partition ids from the normal schema and moves them to the end. // Top-level fields - pub project_id: String, + pub project_id: String, #[serde(with = "chrono::serde::ts_microseconds")] pub timestamp: chrono::DateTime, diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 043383c..41f694e 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -1,341 +1,366 @@ -#[cfg(test)] -mod integration { - use std::{ - collections::HashSet, - sync::{Arc, Mutex}, - time::{Duration, Instant}, - }; - - use anyhow::Result; - use dotenv::dotenv; - use rand::Rng; - use scopeguard; - use serial_test::serial; - use timefusion::{config::Config, database::Database}; - use tokio::{sync::Notify, time::sleep}; - use tokio_postgres::{Client, NoTls}; - use tokio_util::sync::CancellationToken; - use uuid::Uuid; - - async fn connect_with_retry(port: u16, timeout: Duration) -> Result<(Client, tokio::task::JoinHandle<()>), tokio_postgres::Error> { - let start = Instant::now(); - let conn_string = format!("host=localhost port={port} user=postgres password=postgres"); - - while start.elapsed() < timeout { - match tokio_postgres::connect(&conn_string, NoTls).await { - Ok((client, connection)) => { - let handle = tokio::spawn(async move { - if let Err(e) = connection.await { - eprintln!("Connection error: {}", e); - } - }); - return Ok((client, handle)); - } - Err(_) => sleep(Duration::from_millis(100)).await, +use std::{ + collections::HashSet, + sync::{Arc, Mutex}, + time::{Duration, Instant}, +}; + +use anyhow::Result; +use chrono::Utc; +use dotenv::dotenv; +use rand::Rng; +use scopeguard; +use serial_test::serial; +use timefusion::{config::Config, database::Database, persistent_queue::OtelLogsAndSpans}; +use tokio::{sync::Notify, time::sleep}; +use tokio_postgres::{Client, NoTls}; +use tokio_util::sync::CancellationToken; +use uuid::Uuid; + +async fn connect_with_retry(port: u16, timeout: Duration) -> Result<(Client, tokio::task::JoinHandle<()>), tokio_postgres::Error> { + let start = Instant::now(); + let conn_string = format!("host=localhost port={port} user=postgres password=postgres"); + + // Increase timeout to 10 seconds. + while start.elapsed() < timeout { + match tokio_postgres::connect(&conn_string, NoTls).await { + Ok((client, connection)) => { + let handle = tokio::spawn(async move { + if let Err(e) = connection.await { + eprintln!("Connection error: {}", e); + } + }); + return Ok((client, handle)); } + Err(_) => sleep(Duration::from_millis(100)).await, } - - let (client, connection) = tokio_postgres::connect(&conn_string, NoTls).await?; - let handle = tokio::spawn(async move { - if let Err(e) = connection.await { - eprintln!("Connection error: {}", e); - } - }); - - Ok((client, handle)) } - async fn start_test_server() -> Result<(Arc, String, u16)> { - let test_id = Uuid::new_v4().to_string(); - let _ = env_logger::builder().is_test(true).try_init(); - dotenv().ok(); - - let mut rng = rand::thread_rng(); - let port = 5433 + (rng.gen_range(1..100) as u16); - - unsafe { - std::env::set_var("PGWIRE_PORT", &port.to_string()); - std::env::set_var("TIMEFUSION_TABLE_PREFIX", format!("test-{}", test_id)); + let (client, connection) = tokio_postgres::connect(&conn_string, NoTls).await?; + let handle = tokio::spawn(async move { + if let Err(e) = connection.await { + eprintln!("Connection error: {}", e); } + }); + Ok((client, handle)) +} - let shutdown_signal = Arc::new(Notify::new()); - let shutdown_signal_clone = shutdown_signal.clone(); +async fn start_test_server() -> Result<(Arc, String, u16)> { + let test_id = Uuid::new_v4().to_string(); + let _ = env_logger::builder().is_test(true).try_init(); + dotenv().ok(); - tokio::spawn(async move { - let config = Config::from_env().expect("Failed to load config"); - let db = Database::new(&config).await.expect("Failed to create database"); - let session_context = db.create_session_context(); - db.setup_session_context(&session_context).expect("Failed to setup session context"); + let mut rng = rand::thread_rng(); + let port = 5433 + (rng.gen_range(1..100) as u16); - let port = std::env::var("PGWIRE_PORT").expect("PGWIRE_PORT not set").parse::().expect("Invalid PGWIRE_PORT"); + // Set test-specific environment variables. + unsafe { + std::env::set_var("PGWIRE_PORT", &port.to_string()); + std::env::set_var("TIMEFUSION_TABLE_PREFIX", format!("test-{}", test_id)); + } - let shutdown_token = CancellationToken::new(); - let pg_server = db.start_pgwire_server(session_context, port, shutdown_token.clone()).await.expect("Failed to start PGWire server"); + let shutdown_signal = Arc::new(Notify::new()); + let shutdown_signal_clone = shutdown_signal.clone(); - shutdown_signal_clone.notified().await; - shutdown_token.cancel(); - let _ = pg_server.await; - }); + tokio::spawn(async move { + let config = Config::from_env().expect("Failed to load config"); + let db = Database::new(&config).await.expect("Failed to create database"); + let session_context = db.create_session_context(); + db.setup_session_context(&session_context).expect("Failed to setup session context"); let port = std::env::var("PGWIRE_PORT").expect("PGWIRE_PORT not set").parse::().expect("Invalid PGWIRE_PORT"); - let _ = connect_with_retry(port, Duration::from_secs(5)).await?; - Ok((shutdown_signal, test_id, port)) - } + let shutdown_token = CancellationToken::new(); + let pg_server = db.start_pgwire_server(session_context, port, shutdown_token.clone()).await.expect("Failed to start PGWire server"); - // Rest of the file remains unchanged - #[tokio::test] - #[serial] - async fn test_postgres_integration() -> Result<()> { - let (shutdown_signal, test_id, port) = start_test_server().await?; - let shutdown = || { - shutdown_signal.notify_one(); - }; + shutdown_signal_clone.notified().await; + shutdown_token.cancel(); + let _ = pg_server.await; + }); - let shutdown_guard = scopeguard::guard((), |_| shutdown()); + // Increase retry timeout to 10 seconds. + let port = std::env::var("PGWIRE_PORT").expect("PGWIRE_PORT not set").parse::().expect("Invalid PGWIRE_PORT"); + let _ = connect_with_retry(port, Duration::from_secs(10)).await?; + Ok((shutdown_signal, test_id, port)) +} + +#[tokio::test] +#[serial] +async fn test_compaction() -> Result<()> { + let (shutdown_signal, _test_id, _port) = start_test_server().await?; + let shutdown = || shutdown_signal.notify_one(); + let shutdown_guard = scopeguard::guard((), |_| shutdown()); + + let config = Config::from_env().expect("Failed to load config"); + let db = Database::new(&config).await.expect("Failed to create database"); + let session_context = db.create_session_context(); + db.setup_session_context(&session_context).expect("Failed to setup session context"); + + // Insert test records. + let now = Utc::now(); + let records = vec![ + OtelLogsAndSpans { + project_id: "test_project".to_string(), + timestamp: now, + observed_timestamp: Some(now), + id: "compaction_test_1".to_string(), + name: Some("compaction_span_1".to_string()), + ..Default::default() + }, + OtelLogsAndSpans { + project_id: "test_project".to_string(), + timestamp: now + chrono::Duration::seconds(1), + observed_timestamp: Some(now + chrono::Duration::seconds(1)), + id: "compaction_test_2".to_string(), + name: Some("compaction_span_2".to_string()), + ..Default::default() + }, + ]; + db.insert_records(&records).await.expect("Failed to insert records"); + + // Call compaction (placeholder). + db.compact(&session_context).await.expect("Compaction failed"); + + // Verify that data remains intact. + let df = session_context + .sql("SELECT COUNT(*) as count FROM otel_logs_and_spans WHERE id LIKE 'compaction_test_%'") + .await + .expect("Failed to run SQL query"); + let result = df.collect().await.expect("Failed to collect results"); + + use datafusion::assert_batches_eq; + assert_batches_eq!(["+-------+", "| count |", "+-------+", "| 2 |", "+-------+"], &result); + + shutdown_signal.notify_one(); + std::mem::drop(shutdown_guard); + + Ok(()) +} - let (client, _) = connect_with_retry(port, Duration::from_secs(3)) +#[tokio::test] +#[serial] +async fn test_postgres_integration() -> Result<()> { + let (shutdown_signal, test_id, port) = start_test_server().await?; + let shutdown = || shutdown_signal.notify_one(); + let shutdown_guard = scopeguard::guard((), |_| shutdown()); + + let (client, _) = connect_with_retry(port, Duration::from_secs(3)).await.expect("Failed to connect to PostgreSQL"); + + let timestamp_str = format!("'{}'", Utc::now().format("%Y-%m-%d %H:%M:%S")); + let insert_query = format!( + "INSERT INTO otel_logs_and_spans (project_id, timestamp, id, name, status_code, status_message, level) + VALUES ($1, {}, $2, $3, $4, $5, $6)", + timestamp_str + ); + + { + client + .execute( + &insert_query, + &[&"test_project", &test_id, &"test_span_name", &"OK", &"Test integration", &"INFO"], + ) .await - .map_err(|e| anyhow::anyhow!("Failed to connect to PostgreSQL: {}", e))?; + .expect("Insert should succeed"); - let timestamp_str = format!("'{}'", chrono::Utc::now().format("%Y-%m-%d %H:%M:%S")); - let insert_query = format!( - "INSERT INTO otel_logs_and_spans (project_id, timestamp, id, name, status_code, status_message, level) - VALUES ($1, {}, $2, $3, $4, $5, $6)", - timestamp_str - ); + let rows = client.query("SELECT COUNT(*) FROM otel_logs_and_spans WHERE id = $1", &[&test_id]).await.expect("Query should succeed"); + assert_eq!(rows[0].get::<_, i64>(0), 1, "Should have found exactly one row"); - let result = async { + let detail_rows = client + .query("SELECT name, status_code FROM otel_logs_and_spans WHERE id = $1", &[&test_id]) + .await + .expect("Query should succeed"); + assert_eq!(detail_rows.len(), 1, "Should have found exactly one detailed row"); + assert_eq!(detail_rows[0].get::<_, String>(0), "test_span_name", "Name should match"); + assert_eq!(detail_rows[0].get::<_, String>(1), "OK", "Status code should match"); + + for i in 0..5 { + let span_id = Uuid::new_v4().to_string(); client .execute( &insert_query, - &[&"test_project", &test_id, &"test_span_name", &"OK", &"Test integration", &"INFO"], + &[&"test_project", &span_id, &format!("batch_span_{}", i), &"OK", &format!("Batch test {}", i), &"INFO"], ) - .await?; - - let rows = client.query("SELECT COUNT(*) FROM otel_logs_and_spans WHERE id = $1", &[&test_id]).await?; - assert_eq!(rows[0].get::<_, i64>(0), 1, "Should have found exactly one row"); - - let detail_rows = client.query("SELECT name, status_code FROM otel_logs_and_spans WHERE id = $1", &[&test_id]).await?; - assert_eq!(detail_rows.len(), 1, "Should have found exactly one detailed row"); - assert_eq!(detail_rows[0].get::<_, String>(0), "test_span_name", "Name should match"); - assert_eq!(detail_rows[0].get::<_, String>(1), "OK", "Status code should match"); - - for i in 0..5 { - let span_id = Uuid::new_v4().to_string(); - client - .execute( - &insert_query, - &[&"test_project", &span_id, &format!("batch_span_{}", i), &"OK", &format!("Batch test {}", i), &"INFO"], - ) - .await?; - } - - let count_rows = client.query("SELECT COUNT(*) FROM otel_logs_and_spans WHERE project_id = $1", &[&"test_project"]).await?; - assert_eq!(count_rows[0].get::<_, i64>(0), 6, "Should have a total of 6 records"); - - let count_rows = client.query("SELECT project_id FROM otel_logs_and_spans WHERE project_id = $1", &[&"test_project"]).await?; - assert_eq!(count_rows[0].get::<_, String>(0), "test_project", "project_id should match"); - - let count_rows = client.query("SELECT * FROM otel_logs_and_spans WHERE project_id = $1", &[&"test_project"]).await?; - assert_eq!(count_rows[0].columns().len(), 80, "Should return all 80 columns"); - - Ok::<_, tokio_postgres::Error>(()) + .await + .expect("Batch insert should succeed"); } - .await; - - std::mem::drop(shutdown_guard); - shutdown(); - result.map_err(|e| anyhow::anyhow!("Test failed: {}", e)) - } - - #[tokio::test] - #[serial] - async fn test_concurrent_postgres_requests() -> Result<()> { - let (shutdown_signal, test_id, port) = start_test_server().await?; - let shutdown = || { - shutdown_signal.notify_one(); - }; - - let shutdown_guard = scopeguard::guard((), |_| shutdown()); + let count_rows = client + .query("SELECT COUNT(*) FROM otel_logs_and_spans WHERE project_id = $1", &[&"test_project"]) + .await + .expect("Query should succeed"); + assert_eq!(count_rows[0].get::<_, i64>(0), 6, "Should have a total of 6 records"); - let num_clients = 5; - let ops_per_client = 10; + let count_rows = client + .query("SELECT project_id FROM otel_logs_and_spans WHERE project_id = $1", &[&"test_project"]) + .await + .expect("Query should succeed"); + assert_eq!(count_rows[0].get::<_, String>(0), "test_project", "project_id should match"); - println!("Creating {} client connections", num_clients); + let count_rows = client + .query("SELECT * FROM otel_logs_and_spans WHERE project_id = $1", &[&"test_project"]) + .await + .expect("Query should succeed"); + assert_eq!(count_rows[0].columns().len(), 80, "Should return all 80 columns"); + } - let inserted_ids = Arc::new(Mutex::new(HashSet::new())); + std::mem::drop(shutdown_guard); + shutdown(); - let timestamp_str = format!("'{}'", chrono::Utc::now().format("%Y-%m-%d %H:%M:%S")); - let insert_query = format!( - "INSERT INTO otel_logs_and_spans (project_id, timestamp, id, name, status_code, status_message, level) - VALUES ($1, {}, $2, $3, $4, $5, $6)", - timestamp_str - ); + Ok(()) +} - let mut handles = Vec::with_capacity(num_clients); +#[tokio::test] +#[serial] +async fn test_concurrent_postgres_requests() -> Result<()> { + let (shutdown_signal, test_id, port) = start_test_server().await?; + let shutdown = || shutdown_signal.notify_one(); + let shutdown_guard = scopeguard::guard((), |_| shutdown()); + + let num_clients = 5; + let ops_per_client = 10; + println!("Creating {} client connections", num_clients); + + let inserted_ids = Arc::new(Mutex::new(HashSet::new())); + let timestamp_str = format!("'{}'", Utc::now().format("%Y-%m-%d %H:%M:%S")); + let insert_query = format!( + "INSERT INTO otel_logs_and_spans (project_id, timestamp, id, name, status_code, status_message, level) + VALUES ($1, {}, $2, $3, $4, $5, $6)", + timestamp_str + ); + + let mut handles = Vec::with_capacity(num_clients); + for i in 0..num_clients { + let (client, _) = connect_with_retry(port, Duration::from_secs(3)).await.expect("Failed to connect to PostgreSQL"); + + let insert_query = insert_query.clone(); + let inserted_ids_clone = Arc::clone(&inserted_ids); + let test_id_prefix = format!("{}-client-{}", test_id, i); - for i in 0..num_clients { - let (client, _) = connect_with_retry(port, Duration::from_secs(3)) - .await - .map_err(|e| anyhow::anyhow!("Failed to connect to PostgreSQL: {}", e))?; - - let insert_query = insert_query.clone(); - let inserted_ids_clone = Arc::clone(&inserted_ids); - let test_id_prefix = format!("{}-client-{}", test_id, i); - - let handle = tokio::spawn(async move { - let mut client_ids = HashSet::new(); - - for j in 0..ops_per_client { - let span_id = format!("{}-op-{}", test_id_prefix, j); - println!("Client {} executing operation {}", i, j); - let start = Instant::now(); - client - .execute( - &insert_query, - &[ - &"test_project", - &span_id, - &format!("concurrent_span_client_{}_op_{}", i, j), - &"OK", - &format!("Concurrent test client {} op {}", i, j), - &"INFO", - ], + let handle = tokio::spawn(async move { + let mut client_ids = HashSet::new(); + for j in 0..ops_per_client { + let span_id = format!("{}-op-{}", test_id_prefix, j); + println!("Client {} executing operation {}", i, j); + let start = Instant::now(); + client + .execute( + &insert_query, + &[ + &"test_project", + &span_id, + &format!("concurrent_span_client_{}_op_{}", i, j), + &"OK", + &format!("Concurrent test client {} op {}", i, j), + &"INFO", + ], + ) + .await + .expect("Insert should succeed"); + println!("Client {} operation {} completed in {:?}", i, j, start.elapsed()); + client_ids.insert(span_id); + + if j % 3 == 0 { + let _ = client + .query("SELECT COUNT(*) FROM otel_logs_and_spans WHERE project_id = $1", &[&"test_project"]) + .await + .expect("Query should succeed"); + } + if j % 5 == 0 { + let _ = client + .query( + &format!("SELECT name, status_code FROM otel_logs_and_spans WHERE id LIKE '{test_id_prefix}%'"), + &[], ) .await - .expect("Insert should succeed"); - println!("Client {} operation {} completed in {:?}", i, j, start.elapsed()); + .expect("Query should succeed"); + } + } + let mut ids = inserted_ids_clone.lock().unwrap(); + ids.extend(client_ids); + }); + handles.push(handle); + } - client_ids.insert(span_id); + for handle in handles { + handle.await.expect("Task should complete successfully"); + } - if j % 3 == 0 { - let _query_result = client + let (client, _) = connect_with_retry(port, Duration::from_secs(3)).await.expect("Failed to connect to PostgreSQL"); + + let count_rows = client + .query(&format!("SELECT COUNT(*) FROM otel_logs_and_spans WHERE id LIKE '{test_id}%'"), &[]) + .await + .expect("Query failed"); + let count = count_rows[0].get::<_, i64>(0); + let expected_count = (num_clients * ops_per_client) as i64; + println!("Total records found: {} (expected {})", count, expected_count); + assert_eq!(count, expected_count, "Should have inserted the expected number of records"); + + let id_rows = client + .query(&format!("SELECT id FROM otel_logs_and_spans WHERE id LIKE '{test_id}%'"), &[]) + .await + .expect("Query failed"); + let mut db_ids = HashSet::new(); + for row in id_rows { + db_ids.insert(row.get::<_, String>(0)); + } + let ids = inserted_ids.lock().unwrap(); + let missing_ids: Vec<_> = ids.difference(&db_ids).collect(); + let unexpected_ids: Vec<_> = db_ids.difference(&ids).collect(); + assert!(missing_ids.is_empty(), "Missing IDs: {:?}", missing_ids); + assert!(unexpected_ids.is_empty(), "Unexpected IDs: {:?}", unexpected_ids); + + let num_query_clients = 3; + let queries_per_client = 5; + let mut query_handles = Vec::with_capacity(num_query_clients); + let query_times = Arc::new(Mutex::new(Vec::new())); + for _i in 0..num_query_clients { + let (client, _) = connect_with_retry(port, Duration::from_secs(3)).await.expect("Failed to connect to PostgreSQL"); + let test_id = test_id.clone(); + let query_times = Arc::clone(&query_times); + let handle = tokio::spawn(async move { + let start = Instant::now(); + for j in 0..queries_per_client { + match j % 3 { + 0 => { + let _ = client .query("SELECT COUNT(*) FROM otel_logs_and_spans WHERE project_id = $1", &[&"test_project"]) .await .expect("Query should succeed"); } - - if j % 5 == 0 { - let _detail_rows = client + 1 => { + let _ = client .query( - &format!("SELECT name, status_code FROM otel_logs_and_spans WHERE id LIKE '{test_id_prefix}%'"), + &format!("SELECT name, status_code FROM otel_logs_and_spans WHERE id LIKE '{test_id}%' LIMIT 10"), &[], ) .await .expect("Query should succeed"); } - } - - let mut ids = inserted_ids_clone.lock().unwrap(); - ids.extend(client_ids); - () - }); - - handles.push(handle); - } - - for handle in handles { - let _ = handle.await.expect("Task should complete successfully"); - } - - let (client, _) = connect_with_retry(port, Duration::from_secs(3)) - .await - .map_err(|e| anyhow::anyhow!("Failed to connect to PostgreSQL: {}", e))?; - - let count_rows = client - .query(&format!("SELECT COUNT(*) FROM otel_logs_and_spans WHERE id LIKE '{test_id}%'"), &[]) - .await - .map_err(|e| anyhow::anyhow!("Query failed: {}", e))?; - - let count = count_rows[0].get::<_, i64>(0); - let expected_count = (num_clients * ops_per_client) as i64; - - println!("Total records found: {} (expected {})", count, expected_count); - assert_eq!(count, expected_count, "Should have inserted the expected number of records"); - - let id_rows = client - .query(&format!("SELECT id FROM otel_logs_and_spans WHERE id LIKE '{test_id}%'"), &[]) - .await - .map_err(|e| anyhow::anyhow!("Query failed: {}", e))?; - - let mut db_ids = HashSet::new(); - for row in id_rows { - db_ids.insert(row.get::<_, String>(0)); - } - - let ids = inserted_ids.lock().unwrap(); - let missing_ids: Vec<_> = ids.difference(&db_ids).collect(); - let unexpected_ids: Vec<_> = db_ids.difference(&ids).collect(); - - assert!(missing_ids.is_empty(), "Expected all IDs to be found, missing: {:?}", missing_ids); - assert!(unexpected_ids.is_empty(), "Found unexpected IDs: {:?}", unexpected_ids); - - let num_query_clients = 3; - let queries_per_client = 5; - - let mut query_handles = Vec::with_capacity(num_query_clients); - let query_times = Arc::new(Mutex::new(Vec::new())); - - for _i in 0..num_query_clients { - let (client, _) = connect_with_retry(port, Duration::from_secs(3)) - .await - .map_err(|e| anyhow::anyhow!("Failed to connect to PostgreSQL: {}", e))?; - - let test_id = test_id.clone(); - let query_times = Arc::clone(&query_times); - - let handle = tokio::spawn(async move { - let start = Instant::now(); - - for j in 0..queries_per_client { - match j % 3 { - 0 => { - let _ = client - .query("SELECT COUNT(*) FROM otel_logs_and_spans WHERE project_id = $1", &[&"test_project"]) - .await - .expect("Query should succeed"); - } - 1 => { - let _ = client - .query( - &format!("SELECT name, status_code FROM otel_logs_and_spans WHERE id LIKE '{test_id}%' LIMIT 10"), - &[], - ) - .await - .expect("Query should succeed"); - } - _ => { - let _ = client - .query("SELECT status_code, COUNT(*) FROM otel_logs_and_spans GROUP BY status_code", &[]) - .await - .expect("Query should succeed"); - } + _ => { + let _ = client + .query("SELECT status_code, COUNT(*) FROM otel_logs_and_spans GROUP BY status_code", &[]) + .await + .expect("Query should succeed"); } } + } + let elapsed = start.elapsed(); + let mut times = query_times.lock().unwrap(); + times.push(elapsed); + }); + query_handles.push(handle); + } - let elapsed = start.elapsed(); - let mut times = query_times.lock().unwrap(); - times.push(elapsed); - () - }); - - query_handles.push(handle); - } - - for handle in query_handles { - let _ = handle.await.expect("Task should complete successfully"); - } - - let times = query_times.lock().unwrap(); - let total_time: Duration = times.iter().sum(); - let avg_time = if times.is_empty() { Duration::new(0, 0) } else { total_time / times.len() as u32 }; - println!("Average query execution time per client: {:?}", avg_time); + for handle in query_handles { + handle.await.expect("Task should complete successfully"); + } + let times = query_times.lock().unwrap(); + let total_time: Duration = times.iter().sum(); + let avg_time = if times.is_empty() { Duration::new(0, 0) } else { total_time / times.len() as u32 }; + println!("Average query execution time per client: {:?}", avg_time); - std::mem::drop(shutdown_guard); - shutdown(); + std::mem::drop(shutdown_guard); + shutdown(); - Ok(()) - } + Ok(()) } diff --git a/tests/sqllogictest.rs b/tests/sqllogictest.rs index 4217e2f..558dc0f 100644 --- a/tests/sqllogictest.rs +++ b/tests/sqllogictest.rs @@ -11,8 +11,7 @@ mod sqllogictest_tests { use dotenv::dotenv; use serial_test::serial; use sqllogictest::{AsyncDB, DBOutput, DefaultColumnType}; - use timefusion::config::Config; // Add this - use timefusion::database::Database; + use timefusion::{config::Config, database::Database}; use tokio::{sync::Notify, time::sleep}; use tokio_postgres::{NoTls, Row}; use tokio_util::sync::CancellationToken; @@ -172,7 +171,7 @@ mod sqllogictest_tests { let shutdown_signal = start_test_server().await?; let factory = || async move { - let (client, _) = connect_with_retry(Duration::from_secs(3)).await?; + let (client, _) = connect_with_retry(Duration::from_secs(10)).await?; Ok(TestDB { client }) }; From a89da51e91656b4338df31481952d676b2ed513f Mon Sep 17 00:00:00 2001 From: Oluwapeluwa Ibrahim Date: Fri, 11 Apr 2025 14:54:52 +0100 Subject: [PATCH 11/14] Compaction and test: TODO: Implement merging & replacement logic for compaction. --- src/database.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/database.rs b/src/database.rs index 4d83f6c..bf54657 100644 --- a/src/database.rs +++ b/src/database.rs @@ -271,7 +271,6 @@ impl Database { Ok(()) } - // Make insert_records public for external use. #[tracing::instrument(name = "db.insert_records", skip(self, records))] pub async fn insert_records(&self, records: &Vec) -> Result<()> { use serde_arrow::schema::SchemaLike; @@ -329,7 +328,6 @@ impl Database { Ok(()) } - // Production-style periodic compaction method (placeholder). #[tracing::instrument(name = "db.compact", skip(self, session_context))] pub async fn compact(&self, session_context: &SessionContext) -> Result<()> { let configs = self.project_configs.read().await; @@ -356,7 +354,6 @@ impl Database { } } -// -- ProjectRoutingTable and its implementations -- #[derive(Debug, Clone)] pub struct ProjectRoutingTable { From 52f660961fb331af57d658cf533669afd4c5e478 Mon Sep 17 00:00:00 2001 From: Oluwapeluwa Ibrahim Date: Fri, 11 Apr 2025 17:38:23 +0100 Subject: [PATCH 12/14] Implemented Merging and sorting Logic, Worked on Register endpoint --- .../_delta_log/00000000000000000000.json | 3 + .../_delta_log/00000000000000000001.json | 2 + ...4e4c-a31d-2c21962a6cef-c000.snappy.parquet | Bin 0 -> 23264 bytes src/database.rs | 94 +++++++++++++----- src/error.rs | 36 +++++-- src/main.rs | 21 +++- 6 files changed, 118 insertions(+), 38 deletions(-) create mode 100644 my_bucket/_delta_log/00000000000000000000.json create mode 100644 my_bucket/_delta_log/00000000000000000001.json create mode 100644 my_bucket/project_id=test/timestamp=2025-04-11%2016%3A04%3A19.186152/part-00001-2d3667a5-9934-4e4c-a31d-2c21962a6cef-c000.snappy.parquet diff --git a/my_bucket/_delta_log/00000000000000000000.json b/my_bucket/_delta_log/00000000000000000000.json new file mode 100644 index 0000000..9f23f0b --- /dev/null +++ b/my_bucket/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["timestampNtz"],"writerFeatures":["timestampNtz"]}} +{"metaData":{"id":"e202836c-97a3-4138-9638-950d5ca6c6f9","name":null,"description":null,"format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"observed_timestamp\",\"type\":\"timestamp_ntz\",\"nullable\":true,\"metadata\":{}},{\"name\":\"id\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}},{\"name\":\"parent_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"kind\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"status_code\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"status_message\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"level\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"severity___severity_text\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"severity___severity_number\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"body\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"duration\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"start_time\",\"type\":\"timestamp_ntz\",\"nullable\":true,\"metadata\":{}},{\"name\":\"end_time\",\"type\":\"timestamp_ntz\",\"nullable\":true,\"metadata\":{}},{\"name\":\"context___trace_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"context___span_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"context___trace_state\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"context___trace_flags\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"context___is_remote\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"events\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"links\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___client___address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___client___port\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___server___address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___server___port\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___local__address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___local__port\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___peer___address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___peer__port\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___protocol___name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___protocol___version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___transport\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___code___number\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___code___file___path\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___code___function___name\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___code___line___number\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___code___stacktrace\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___log__record___original\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___log__record___uid\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___error___type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___exception___type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___exception___message\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___exception___stacktrace\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___url___fragment\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___url___full\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___url___path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___url___query\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___url___scheme\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___user_agent___original\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___http___request___method\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___http___request___method_original\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___http___response___status_code\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___http___request___resend_count\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___http___request___body___size\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___session___id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___session___previous___id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___system___name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___collection___name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___namespace\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___operation___name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___response___status_code\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___operation___batch___size\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___query___summary\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___query___text\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___user___id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___user___email\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___user___full_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___user___name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___user___hash\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___service___name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___service___version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___service___instance___id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___service___namespace\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___telemetry___sdk___language\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___telemetry___sdk___name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___telemetry___sdk___version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___user_agent___original\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"project_id\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}},{\"name\":\"timestamp\",\"type\":\"timestamp_ntz\",\"nullable\":false,\"metadata\":{}}]}","partitionColumns":["project_id","timestamp"],"createdTime":1744387459169,"configuration":{}}} +{"commitInfo":{"timestamp":1744387459170,"operation":"CREATE TABLE","operationParameters":{"metadata":"{\"configuration\":{},\"createdTime\":1744387459169,\"description\":null,\"format\":{\"options\":{},\"provider\":\"parquet\"},\"id\":\"e202836c-97a3-4138-9638-950d5ca6c6f9\",\"name\":null,\"partitionColumns\":[\"project_id\",\"timestamp\"],\"schemaString\":\"{\\\"type\\\":\\\"struct\\\",\\\"fields\\\":[{\\\"name\\\":\\\"observed_timestamp\\\",\\\"type\\\":\\\"timestamp_ntz\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":false,\\\"metadata\\\":{}},{\\\"name\\\":\\\"parent_id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"kind\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"status_code\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"status_message\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"level\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"severity___severity_text\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"severity___severity_number\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"body\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"duration\\\",\\\"type\\\":\\\"long\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"start_time\\\",\\\"type\\\":\\\"timestamp_ntz\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"end_time\\\",\\\"type\\\":\\\"timestamp_ntz\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"context___trace_id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"context___span_id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"context___trace_state\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"context___trace_flags\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"context___is_remote\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"events\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"links\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___client___address\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___client___port\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___server___address\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___server___port\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___local__address\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___local__port\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___peer___address\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___peer__port\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___protocol___name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___protocol___version\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___transport\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___type\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___code___number\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___code___file___path\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___code___function___name\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___code___line___number\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___code___stacktrace\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___log__record___original\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___log__record___uid\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___error___type\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___exception___type\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___exception___message\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___exception___stacktrace\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___url___fragment\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___url___full\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___url___path\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___url___query\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___url___scheme\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___user_agent___original\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___http___request___method\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___http___request___method_original\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___http___response___status_code\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___http___request___resend_count\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___http___request___body___size\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___session___id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___session___previous___id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___system___name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___collection___name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___namespace\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___operation___name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___response___status_code\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___operation___batch___size\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___query___summary\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___query___text\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___user___id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___user___email\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___user___full_name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___user___name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___user___hash\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___service___name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___service___version\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___service___instance___id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___service___namespace\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___telemetry___sdk___language\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___telemetry___sdk___name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___telemetry___sdk___version\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___user_agent___original\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"project_id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":false,\\\"metadata\\\":{}},{\\\"name\\\":\\\"timestamp\\\",\\\"type\\\":\\\"timestamp_ntz\\\",\\\"nullable\\\":false,\\\"metadata\\\":{}}]}\"}","mode":"ErrorIfExists","protocol":"{\"minReaderVersion\":3,\"minWriterVersion\":7,\"readerFeatures\":[\"timestampNtz\"],\"writerFeatures\":[\"timestampNtz\"]}","location":"file:///Users/ipeluwa/Playground/Apt/timefusion/my_bucket"},"clientVersion":"delta-rs.0.25.0"}} \ No newline at end of file diff --git a/my_bucket/_delta_log/00000000000000000001.json b/my_bucket/_delta_log/00000000000000000001.json new file mode 100644 index 0000000..70819b7 --- /dev/null +++ b/my_bucket/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"add":{"path":"project_id=test/timestamp=2025-04-11%252016%253A04%253A19.186152/part-00001-2d3667a5-9934-4e4c-a31d-2c21962a6cef-c000.snappy.parquet","partitionValues":{"timestamp":"2025-04-11 16:04:19.186152","project_id":"test"},"size":23264,"modificationTime":1744387459220,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"observed_timestamp\":\"2025-04-11T16:04:19.186153Z\",\"duration\":1,\"start_time\":\"2025-04-11T16:04:19.186154Z\",\"context___span_id\":\"dummy_span\",\"id\":\"dummy_test\",\"context___trace_id\":\"dummy_trace\",\"name\":\"dummy\"},\"maxValues\":{\"id\":\"dummy_test\",\"start_time\":\"2025-04-11T16:04:19.186154Z\",\"context___trace_id\":\"dummy_trace\",\"observed_timestamp\":\"2025-04-11T16:04:19.186153Z\",\"name\":\"dummy\",\"context___span_id\":\"dummy_span\",\"duration\":1},\"nullCount\":{\"context___span_id\":0,\"end_time\":1,\"attributes___network___local__address\":1,\"context___trace_id\":0,\"context___trace_state\":1,\"attributes___server___address\":1,\"kind\":1,\"status_message\":1,\"attributes___network___protocol___version\":1,\"context___trace_flags\":1,\"attributes___network___local__port\":1,\"id\":0,\"attributes___client___port\":1,\"name\":0,\"severity___severity_text\":1,\"attributes___network___protocol___name\":1,\"status_code\":1,\"severity___severity_number\":1,\"duration\":0,\"attributes___network___transport\":1,\"start_time\":0,\"links\":1,\"attributes___client___address\":1,\"attributes___network___peer__port\":1,\"context___is_remote\":1,\"attributes___network___peer___address\":1,\"events\":1,\"observed_timestamp\":0,\"level\":1,\"body\":1,\"attributes___server___port\":1,\"parent_id\":1}}","tags":null,"baseRowId":null,"defaultRowCommitVersion":null,"clusteringProvider":null}} +{"commitInfo":{"timestamp":1744387459221,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"project_id\",\"timestamp\"]"},"operationMetrics":{"execution_time_ms":32,"num_added_files":1,"num_added_rows":1,"num_partitions":0,"num_removed_files":0},"clientVersion":"delta-rs.0.25.0"}} \ No newline at end of file diff --git a/my_bucket/project_id=test/timestamp=2025-04-11%2016%3A04%3A19.186152/part-00001-2d3667a5-9934-4e4c-a31d-2c21962a6cef-c000.snappy.parquet b/my_bucket/project_id=test/timestamp=2025-04-11%2016%3A04%3A19.186152/part-00001-2d3667a5-9934-4e4c-a31d-2c21962a6cef-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..777baf52f004c3fc9d08674a83816610e7caf85c GIT binary patch literal 23264 zcmeHPeP~qIwx3MZrfRKPXPUIBwYKhR{YWxNY|?x0d+*FlX7Xh+nI`k~?z>^WOfs2| zbUrf4{8glg6e%JiMMOkIq=-lnDWQlIxrme^rAUz?A|gKVNs&^d_{v*rpL6z{z0aAG z_#WK%$IZ~2oORaTzqR(-YpuQZnp}O7zty(Pw$`>`z-F~s)>|y~P5=4dw{GootgzTD z_nu3Q3O}uN7At@?**2+QYuZ=B`$Q>|nHwu63q^oj zW~=8QS5~W=uto33M%!l9Q(oy@ z1?nqan2*Mj1}j;mVK-J>oqs2yUUH_Xx& z?mVTP3j;M(IY(%R8HiYVrTW>DC6+89=AI>=_{aD}lfBcHf&;+P*kr$5XWNTsfqer~ zcdfQ|N>Sdm7i@o{)Jo)CU8dz7U9P1baMb(?E$`rqTIx1OU8vXcj=aRwI=_-VURp)R zpbBSK+h(XjcBF2-Y|B?wf4D{q_020<>hrZ)>JCSJwNA_XbiJ0kDN|JRt6IkIGKGxq zHfR|qY?MOAFY+TjX`Afd+8N`>YuXw2IqK^ME$_2NE%mcZA@=##8QLdru*V;&kEfg1 z+u!6zMUQK5Y9Wtp)KZT)>hfD!-l0ue>MloJd|S)gzgbKD#8F?qqvbv0sPEs^@=k8i zQcpPQD$(+ezNe)gaMb)(E$`qqEp?ltF1)Yh?c1)Ue&nb#J17sg;i>$nnD+Wk?WE(o zwA5pcy3(xW9d6N5_c-cOtCn}bp`~td)VVe-?+=c;(XQp4>d;cZRw*c6o(=V^j^^{Z z@>n50k<3JG$LpYNzQx+GJ{Kz_^E1iBSaC7~efnr-ddr4^gX^1iU~AW6TfN27V4X~C zu?#fHq_yFt>1aNgEspVTmt~`wq;}Y{smUxiZxx^yONFs`F2N04qr3<4h3GiPTAohM zB-2{dh623FPZsCK#>UuVF*#e*4tkw&G=s9GOe~q#4qFz>CFax@XpH(qDIYCP=CZPF zR)S*rA~g(W6FM}Rl}*NRR-;AZxhyIK3Kp@cEZAvXm5u#gforRB<;iFp&Lp}Kd(+YJ zg1QP+;MFR|WMM3y%;b246=1<^k(;tSJ(->2-n z(FhU3B|b$XNA^8lY&thS1_2Szm!BM;%tpEN!h`bTOMJp>ZkO`tbbIsB@eG8ucHC>+qLgu^G-o5DF}TrM zoCX7&k4wosOSfnn-h#3)dOR@)#8G?%mmn*Q!6`wrk5wQK>s6?3=2c~@hasv+c8JX_+q62CQkPS7n3&oY!bS>de5 z+qeL^TaGv0mwBUAM@{FGGm|-3!{ebX;yaDIeelYHa0piw*}i0MZ{um<4M*yF0xY1%wW9QVTVIXb_zt_qTDE20~@a2zVWz=4XaoxrVL5z{7-(AB& z6VU>f4hUE}SIR>%1c$V?u)s5ZlD-zbX4}G3435C&g=bA>Auh92CLaXb7oKbAhmKnF zipg{m@=cz`WrEfz(d>9hsBw2Ke!6I**2PaXu+`35v(0j>-P+jH_-v3^nrzD&J1i^K zHX2Bv$z3bCYHiwW zb*_h*&~nE=F|z-NvoC&NBb+Mq#zQ9_sIwocBiCLaM_)i?#J@2bu&<=OdyD-S1iJNs z?L!_2Z%RyqtINolWugWLeK7Gzr=WrMM#bhIT~40r=AXs+^WuEr=EWc$EGO5Oiz-}2 zh&v*Ld_!Y1=U0&9@b_Z8`R?G%{au3cHE(hZ?BI*!u@3ARf}In=Om2J(?)Ho1%8R0I zUkn4>4dE1HapFas_*9*!#sa%R(#diJ9C?Y{!x&J%+^9Zb4CvpY(<(7= zW7uZ5pZHLT0ab|RUaApuekJ+iB@m;2K~jA}G5WU}G51l-JyA@(dc|dixU`CVu}U#S zmmC_;aO#(T686^M4opGL)!8rBk+ZAGLx8bt)qX9*eMdv%)~*E2VLw><#-D6{E@<@k zXe!}Mo&9_rx%DzR@v@Nc6_ekiL0^oOnewx^ zIDu2|ic{rI0h{^NI&x&4ICCCn9{3kQdA{xtI|>Z()Ai)O4)G=;o>vjA%2{C2d0=oi z*ORZ;D+($u`{W~V*&kIf^4=6rZ9DXv5_BCTz2X{LUj-Sn_>u6UB1Ass$1>4Fw}4DH804dhsZ2znDizZ60F`Tf-PEXy z#V~((gIs)r*YpNrJ`gea+oFP5?e{Ri(@kW56AySF0Y7&M?!zyg?{=xZ6^8m-6ZuI+ zWlsMr06TtB#N`#`aiNRGAg{eizI;U^~e%egtH;alrMX=92!XkMO)OITw+L!N;gYSrF zN5X)1K}6$EUBeqUgMIc6xvhh}fUq}3SiX^`YZA)cPX_$`yX3;VqQ2iD@G}wk`MRV` z!pSXU-xg8A1C(&eE7+HBwK3I=I&=*F$rkdX4*t^!z+YP=yiwPc!Cxih3=y?IiSUm_ zc>bJc+N%tQYUnEQK$rSmzEV(Plb4EOdna%G#S z>p8?dC*txskm9oL9>crvJ~{lph<6R~eh~5afTnn2H=4oj+fMH3VDBO9QxSI2U26vZ z<92dsyQuRge+BRpeTr?>%8U;=ozB|sHiJL2gB;kQz$-VF<9~(X;;IU+7S((OM8Snl zIKzCpgWOUvSsm~ds{BwyA4Zl`F4?-BA)zu;jtI_nJT_%8B?4(cI7 zT@s=AIMsIG8QSArKWpdW^$@oL_CFv`v!!i3I}4fqtCz& zw~$|TV3!f>?4rQ1^UuKUwUEEJh??C(u`M zNzyR<4?q2qRF+)hQXjs8-;(5{?|}qbl6sMXSL$y`>TZWOu2B#C3_1bEIX)^$?m~AN zkequYKpKIUaisSk?UO!&@#Sv7o^S$8_b|{>FVYofH;fw(sNQL3>x;qW2YPNJ0;MiUmAk>qi(=+B0VrZ2v0dT;z=!lCxH^Kup1_gz*;3= zc>wWU@EDc~FmxPf_b9GCE#*KS+C=I_x-3m02-wEi?Lw`P-ZKu1aP@+1z+y1SH{1jC zxD)V3r8vBmkS;rc6$c5DYuF3;<7lc3jF3FzXu4sf%eW2RakkVpRJC6MRbe*Z^&mZ1 z22rj!iW-%-0qFux4M{k47_5Y=f&IWA$dFtuANZP^Q4mUzKi#8f1Nq~QBYyyg_VlIW zj;_{tW-uLU?b;g-`CH%4)uu4k95C1SIu6$e-xG%fej>2%3NNr8~LUGi@aFIQ;dLZJ|1doZk|OW;YQGp)5Gj3 zNiE2)U}!k!BB8;VM5r$n34qNUo)%ccbT}Jmf%mx9mPl|m9Sa6p;@LFPm58^9zYF1D zM>Y}kWKmx80`mHXK4@;91D{=39`2bV8kF~xt|v5D4g)Q9r%I;#)#FC~46b&e{favR zbBRn^S#!D+Z}U&Y6hDdvhoubaYhu4LT@S?6bFL?$^@pJm^YuuT8CXx|`PT#f9R#;_ z(fPIg+cSdr5a^IkYQzrgb1objgyaGrZwklP#_Mx02Co!`IH7p`RDNx|Jyib$jy2IQ zlL*chLJnokl6wkv&Ln?GdpaS1cri4*I$v)^zkoIGg;<`525J5o$MLoEk+YZbF}_Xk z(5E2&%cl10bt6A%xL${U67`CQ0%;`a(e?=@cUR5*=!($fktC+DrU zKrt5d%q4OBU&#q zKzb6MQ&A7fADm8R0M_lEHMNf?g?#FZI)6~@19>{`?JB`O(p;6mJ%KzHc63c)o&x7SwR0Gsy{t?YU$7AJ72}z9?+cubv zgC8iqQHJ~tX8@e-;PW9zn$D5-(D}9TeN=vA5%|+|kTg;;)z?)){~A@!Gs=hy_Lxls1C<1g7p!{%rHH31 z1vN~2P|Dn1PIn8~ ztN1+j!nz{r?V1WXx}cs)OoRVpksuBEMoiZeM*bWIv_9W(>%n~4Mr*%fB;?P<95e|? z;|t(_Oz~-bq|0ZJPtx&p*F*&NfbIvUySuv!OkI1w-38>=JbyB?G;f6+6Hs$eSC_g> z^OclCKF!ZY=Z}nsz9Z*!xO$l5Ij1NekG|S?Wym)hcrR(lPWo zN2E38ElYV{$aKG3DWBGl2K&)hz!-^S`(RI6q1GRkb}xX}?Tdl`U}>!PA2?T2k&v?m z@=uAz9>hV|QG~mNCk1Cnp$n><+WYGqMLv3tf&J1+FZfOmI|IQd2P=3#!QoPm+hK0c z6!MMhAxEGc?j~iH=iB@pag15GUzzLc4kN#YKSHh1Hv_&7`Ao$F|218|G};CEtj9Q? z6=IGAVD+WpT$+RY1yG{;i>0^CpO zeKwX26vJ>|!0}FB3haYVcrm9Rr!+xxS$MnZZ}c@?snly6zp0qRPq8D}Kq=hD{7^o> zJ??g}2L+}11o4!5FXGFC{iy)_f!z)GX+^33&ak#vTOaK$#C!T?5?-hQ9Q4d^Kuub~oP}HS@dEm{ zQ2o)B^!bn1-#Hg+^Uq)%iMdeiFLY9Tl%(^=?q)dOX#S&Y#UB^59xt5LkZYK&+?Cy? z`OrIpe40D;@zepmm;~el+N&r>;QT}V!$JQP_6Fh@AH8%w-DJ_`vmth_b|q1?eoIjj{Lqw*az}lDjbBpc6g>D*!yaut{&cAE5ErakeYP) zEsEQsvlrIi*$B0N2x`DMoS~AZ0#|91dcJoA`Nrpw7LU`{6|br9fP3-!ItC$6D)pDD z?~vPEUnvEpb`5>Cc$~h{IQnz#{d4vrAA6#bcNXf`Vg>E{5&DnGc@{^0nZ{oldP1c{ z5A0=+8}*|_pBq8&qwPobQT?Gu4g2TlDnZY&U9LaV6+Ba8LFm;DyQa+bb?>2ks_!7w z`u=pXhn|^oeG}-)#Jrv?jgP^Kxjxbe?icptd3}_#5$-@O;Ab>GJwEe(q>u73S2!V` zbFhY&+9Xs!%dYx{R5~kJXfr|v@eh3ZEk<=dt+S+Vh;dVU z!X37`lOB*!N4g~UB%Ceq(nOvsj(q9`UbW|m_kVb4vfs1FFVlWC)MAh;!1riR602TF zUh*m3sT6kOGA-B3YCR=+G_D7aFoilG4GoF~FTqamvR#7jxF{qr=^*%G2|m=H;dggG z7ZFfGz&Hp&HFP*Gjpvx7kA5!@2`VuI{TApWBj8^w4UO=xGQi(g6nEweo0PvO-qg{) c%dx8!{_k4wZ#Vpt{Ac+ur^Vuff7|+h02!!a_y7O^ literal 0 HcmV?d00001 diff --git a/src/database.rs b/src/database.rs index bf54657..8bfa9e1 100644 --- a/src/database.rs +++ b/src/database.rs @@ -49,6 +49,7 @@ impl Clone for Database { impl Database { #[tracing::instrument(name = "db.new", skip(config))] pub async fn new(config: &Config) -> Result { + // Construct the full storage URI for the default project using config values. let storage_uri = format!("s3://{}/{}/?endpoint={}", config.s3_bucket, config.table_prefix, config.s3_endpoint); info!("Storage URI configured: {}", storage_uri); @@ -57,11 +58,11 @@ impl Database { info!("AWS handlers registered"); let project_configs = HashMap::new(); - let db = Self { project_configs: Arc::new(RwLock::new(project_configs)), }; + // Register the default project. db.register_project( "default", &storage_uri, @@ -87,7 +88,6 @@ impl Database { info!("AWS handlers registered"); let project_configs = HashMap::new(); - let db = Self { project_configs: Arc::new(RwLock::new(project_configs)), }; @@ -138,10 +138,8 @@ impl Database { Field::new("name", DataType::Utf8, false), Field::new("setting", DataType::Utf8, false), ])); - let names = vec!["TimeZone".to_string(), "client_encoding".to_string(), "datestyle".to_string(), "client_min_messages".to_string()]; let settings = vec!["UTC".to_string(), "UTF8".to_string(), "ISO, MDY".to_string(), "notice".to_string()]; - let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(StringArray::from(names)), Arc::new(StringArray::from(settings))])?; ctx.register_batch("pg_settings", batch)?; Ok(()) @@ -156,13 +154,11 @@ impl Database { }, logical_expr::{ColumnarValue, ScalarFunctionImplementation, Volatility, create_udf}, }; - let set_config_fn: ScalarFunctionImplementation = Arc::new(move |args: &[ColumnarValue]| -> DFResult { let param_value_array = match &args[1] { ColumnarValue::Array(array) => array.as_any().downcast_ref::().expect("set_config second arg must be a StringArray"), _ => panic!("set_config second arg must be an array"), }; - let mut builder = StringBuilder::new(); for i in 0..param_value_array.len() { if param_value_array.is_null(i) { @@ -173,7 +169,6 @@ impl Database { } Ok(ColumnarValue::Array(Arc::new(builder.finish()))) }); - let set_config_udf = create_udf( "set_config", vec![DataType::Utf8, DataType::Utf8, DataType::Boolean], @@ -190,14 +185,11 @@ impl Database { ) -> Result> { use datafusion_postgres::{DfSessionService, HandlerFactory}; use tokio::net::TcpListener; - let pg_service = Arc::new(DfSessionService::new(session_context)); let handler_factory = Arc::new(HandlerFactory(pg_service.clone())); let pg_listener = TcpListener::bind(format!("0.0.0.0:{}", port)).await.map_err(TimeFusionError::Io)?; info!("PGWire server running on 0.0.0.0:{}", port); - let pgwire_shutdown = shutdown_token.clone(); - let pg_server = tokio::spawn({ let handler_factory = handler_factory.clone(); async move { @@ -226,7 +218,6 @@ impl Database { } } }); - Ok(pg_server) } @@ -254,7 +245,6 @@ impl Database { let _records: Vec = serde_arrow::from_record_batch(record_batch) .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to deserialize record batch: {}", e)))?; } - let (_conn_str, _options, table_ref) = { let configs = self.project_configs.read().await; configs @@ -262,15 +252,14 @@ impl Database { .ok_or_else(|| TimeFusionError::Generic(anyhow::anyhow!("Project ID 'default' not found")))? .clone() }; - let mut table = table_ref.write().await; let ops = DeltaOps(table.clone()); let write_op = ops.write(batch).with_partition_columns(OtelLogsAndSpans::partitions()); *table = write_op.await.map_err(TimeFusionError::Database)?; - Ok(()) } + // Public record insertion method. #[tracing::instrument(name = "db.insert_records", skip(self, records))] pub async fn insert_records(&self, records: &Vec) -> Result<()> { use serde_arrow::schema::SchemaLike; @@ -281,10 +270,47 @@ impl Database { self.insert_records_batch("default", vec![batch]).await } - #[tracing::instrument(name = "db.register_project", skip(self, conn_str, access_key, secret_key, endpoint), fields(project_id))] + // New: verify that storage is writable by performing a dummy write. + async fn verify_storage_write(&self, table_ref: &Arc>) -> Result<()> { + // Create a dummy record with non-null default values. + let dummy = OtelLogsAndSpans { + project_id: "test".to_string(), + timestamp: chrono::Utc::now(), + observed_timestamp: Some(chrono::Utc::now()), + id: "dummy_test".to_string(), + name: Some("dummy".to_string()), + duration: Some(1), + start_time: Some(chrono::Utc::now()), + context___trace_id: Some("dummy_trace".to_string()), + context___span_id: Some("dummy_span".to_string()), + ..Default::default() + }; + use serde_arrow::schema::SchemaLike; + let fields = Vec::::from_type::(serde_arrow::schema::TracingOptions::default()) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Failed to create schema fields for dummy write: {}", e)))?; + let batch = serde_arrow::to_record_batch(&fields, &vec![dummy]) + .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Dummy write conversion failed: {}", e)))?; + let table = table_ref.read().await.clone(); + let ops = DeltaOps(table); + // Attempt to write the dummy record. + ops.write(vec![batch]).await.map_err(TimeFusionError::Database)?; + info!("Storage write verification succeeded"); + Ok(()) + } + + #[tracing::instrument(name = "db.register_project", skip(self, bucket, access_key, secret_key, endpoint), fields(project_id))] pub async fn register_project( - &self, project_id: &str, conn_str: &str, access_key: Option<&str>, secret_key: Option<&str>, endpoint: Option<&str>, + &self, project_id: &str, bucket: &str, access_key: Option<&str>, secret_key: Option<&str>, endpoint: Option<&str>, ) -> Result<()> { + // If the provided bucket string does not already contain a URI scheme, assume S3. + let full_uri = if bucket.starts_with("s3://") || bucket.starts_with("gs://") { + bucket.to_string() + } else { + // Use provided endpoint or default to AWS S3. + let ep = endpoint.unwrap_or("https://s3.amazonaws.com"); + format!("s3://{}/{}/?endpoint={}", bucket, project_id, ep) + }; + let mut storage_options = StorageOptions::default(); if let Some(key) = access_key.filter(|k| !k.is_empty()) { storage_options.0.insert("AWS_ACCESS_KEY_ID".to_string(), key.to_string()); @@ -297,11 +323,16 @@ impl Database { } storage_options.0.insert("AWS_ALLOW_HTTP".to_string(), "true".to_string()); - let table = match DeltaTableBuilder::from_uri(conn_str).with_storage_options(storage_options.0.clone()).with_allow_http(true).load().await { + let table = match DeltaTableBuilder::from_uri(&full_uri) + .with_storage_options(storage_options.0.clone()) + .with_allow_http(true) + .load() + .await + { Ok(table) => table, Err(err) => { log::warn!("Table doesn't exist. Creating new table. Err: {:?}", err); - let delta_ops = DeltaOps::try_from_uri(&conn_str).await.map_err(TimeFusionError::Database)?; + let delta_ops = DeltaOps::try_from_uri(&full_uri).await.map_err(TimeFusionError::Database)?; delta_ops .create() .with_columns(OtelLogsAndSpans::columns().unwrap_or_default()) @@ -312,8 +343,12 @@ impl Database { } }; + let table_ref = Arc::new(RwLock::new(table)); + // Verify that the object storage is writable. + self.verify_storage_write(&table_ref).await?; + let mut configs = self.project_configs.write().await; - configs.insert(project_id.to_string(), (conn_str.to_string(), storage_options, Arc::new(RwLock::new(table)))); + configs.insert(project_id.to_string(), (full_uri, storage_options, table_ref)); Ok(()) } @@ -328,17 +363,16 @@ impl Database { Ok(()) } + // Production-style periodic compaction method. #[tracing::instrument(name = "db.compact", skip(self, session_context))] pub async fn compact(&self, session_context: &SessionContext) -> Result<()> { let configs = self.project_configs.read().await; for (project_id, (_conn_str, _storage_options, table_lock)) in configs.iter() { - // Rename to _current_table to avoid unused warning. - let _current_table = { + let current_table = { let table = table_lock.read().await; table.clone() }; - // Use DataFusion to read the entire table (assumed to be registered under "otel_logs_and_spans"). let df = session_context .table(OtelLogsAndSpans::table_name().as_str()) .await @@ -347,13 +381,25 @@ impl Database { info!("Project {}: Collected {} record batch(es) for compaction", project_id, batches.len()); - // TODO: Implement merging & replacement logic here. - info!("Compaction placeholder complete for project: {}", project_id); + // Merge the record batches into a new Delta version. + let new_table = merge_batches_into_new_version(current_table, batches, OtelLogsAndSpans::partitions()).await?; + + { + let mut table_write = table_lock.write().await; + *table_write = new_table; + } + info!("Compaction completed for project: {}", project_id); } Ok(()) } } +// Helper merging function that writes out a new Delta version using DeltaOps. +async fn merge_batches_into_new_version(table: DeltaTable, batches: Vec, partitions: Vec) -> Result { + let ops = DeltaOps(table); + let new_table = ops.write(batches).with_partition_columns(partitions).await?; + Ok(new_table) +} #[derive(Debug, Clone)] pub struct ProjectRoutingTable { diff --git a/src/error.rs b/src/error.rs index eebf0aa..85801fa 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,9 +1,9 @@ -// error.rs use std::io; use actix_web::error::Error as ActixError; use datafusion::error::DataFusionError; use deltalake::DeltaTableError; +use regex::Regex; use thiserror::Error; #[derive(Error, Debug)] @@ -25,19 +25,37 @@ pub enum TimeFusionError { #[error("Generic error: {0}")] Generic(#[from] anyhow::Error), + // #[error("Validation error: {0}")] + // Validation(String), +} + +/// Extracts key details (such as error code and message) from an XML error string. +/// Returns a simplified error message. +fn extract_error_details(error: &str) -> String { + let code_re = Regex::new(r"(.*?)").unwrap(); + let message_re = Regex::new(r"(.*?)").unwrap(); + + let code = code_re.captures(error).and_then(|caps| caps.get(1)).map(|m| m.as_str()).unwrap_or("UnknownErrorCode"); + + let message = message_re.captures(error).and_then(|caps| caps.get(1)).map(|m| m.as_str()).unwrap_or("Unknown error message"); - #[error("Validation error: {0}")] - Validation(String), + format!("Error [{}]: {}", code, message) } impl actix_web::ResponseError for TimeFusionError { fn error_response(&self) -> actix_web::HttpResponse { - match self { - TimeFusionError::Http(err) => err.error_response(), - _ => actix_web::HttpResponse::InternalServerError().json(serde_json::json!({ - "error": self.to_string() - })), - } + // For database errors, try to extract relevant XML details. + let error_message = match self { + TimeFusionError::Database(err) => { + let err_str = err.to_string(); + if err_str.contains("") { extract_error_details(&err_str) } else { err_str } + } + _ => self.to_string(), + }; + + actix_web::HttpResponse::InternalServerError().json(serde_json::json!({ + "error": error_message, + })) } } diff --git a/src/main.rs b/src/main.rs index aaa547e..6a38ca7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,12 +23,14 @@ use crate::{ error::{Result, TimeFusionError}, }; +/// Shared application state containing the database. struct AppState { db: Arc, } struct ShutdownSignal; +/// Request payload for project registration. #[derive(Deserialize)] struct RegisterProjectRequest { project_id: String, @@ -38,6 +40,12 @@ struct RegisterProjectRequest { endpoint: Option, } +/// The /register_project endpoint. +/// +/// When this endpoint is hit, the system calls `Database::register_project`. +/// That method now constructs a full S3 URI (or other object storage URI) from the provided +/// bucket name (if no scheme is provided) along with the credentials and endpoint, then verifies +/// that storage is writable via a dummy write before registering the project. #[tracing::instrument( name = "HTTP /register_project", skip(req, app_state), @@ -66,7 +74,7 @@ async fn register_project(req: web::Json, app_state: web async fn main() -> Result<()> { dotenv().ok(); - // Load configuration. + // Load configuration from environment variables. let config = Config::from_env().expect("Failed to load config"); // Initialize telemetry. @@ -74,10 +82,13 @@ async fn main() -> Result<()> { info!("Starting TimeFusion application"); + // Create the database. Note: The default project is registered during startup, + // but for user-initiated project registration the provided bucket value is used + // to construct a proper S3 URI (if needed) inside Database::register_project. let db = Database::new(&config).await?; info!("Database initialized successfully"); - // Create a DataFusion session context to be used by both the HTTP server and compaction. + // Create a DataFusion session context for queries and compaction. let session_context = db.create_session_context(); db.setup_session_context(&session_context).expect("Failed to setup session context"); @@ -86,7 +97,7 @@ async fn main() -> Result<()> { let shutdown_token = CancellationToken::new(); let http_shutdown = shutdown_token.clone(); - // Spawn shutdown monitor to flush pending writes. + // Spawn a shutdown monitor to flush pending writes. let db_clone = db.clone(); let shutdown_monitor = shutdown_token.clone(); tokio::spawn(async move { @@ -109,7 +120,7 @@ async fn main() -> Result<()> { } let http_addr = format!("0.0.0.0:{}", config.http_port); - // Clone the Arc for HTTP server. + // Clone the database for the HTTP server. let db_for_http = db.clone(); let http_server = HttpServer::new(move || { App::new() @@ -130,7 +141,7 @@ async fn main() -> Result<()> { } }; - // Spawn periodic compaction background task (every 24 hours). + // Spawn a periodic compaction task (every 24 hours). let db_compaction = db.clone(); let compaction_shutdown = shutdown_token.clone(); let compaction_session = session_context.clone(); From 5de219171d4847b27cec9992df69e1399cb039ac Mon Sep 17 00:00:00 2001 From: Oluwapeluwa Ibrahim Date: Fri, 11 Apr 2025 17:38:51 +0100 Subject: [PATCH 13/14] removed: file pushed --- my_bucket/_delta_log/00000000000000000000.json | 3 --- my_bucket/_delta_log/00000000000000000001.json | 2 -- ...4-4e4c-a31d-2c21962a6cef-c000.snappy.parquet | Bin 23264 -> 0 bytes 3 files changed, 5 deletions(-) delete mode 100644 my_bucket/_delta_log/00000000000000000000.json delete mode 100644 my_bucket/_delta_log/00000000000000000001.json delete mode 100644 my_bucket/project_id=test/timestamp=2025-04-11%2016%3A04%3A19.186152/part-00001-2d3667a5-9934-4e4c-a31d-2c21962a6cef-c000.snappy.parquet diff --git a/my_bucket/_delta_log/00000000000000000000.json b/my_bucket/_delta_log/00000000000000000000.json deleted file mode 100644 index 9f23f0b..0000000 --- a/my_bucket/_delta_log/00000000000000000000.json +++ /dev/null @@ -1,3 +0,0 @@ -{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["timestampNtz"],"writerFeatures":["timestampNtz"]}} -{"metaData":{"id":"e202836c-97a3-4138-9638-950d5ca6c6f9","name":null,"description":null,"format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"observed_timestamp\",\"type\":\"timestamp_ntz\",\"nullable\":true,\"metadata\":{}},{\"name\":\"id\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}},{\"name\":\"parent_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"kind\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"status_code\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"status_message\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"level\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"severity___severity_text\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"severity___severity_number\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"body\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"duration\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"start_time\",\"type\":\"timestamp_ntz\",\"nullable\":true,\"metadata\":{}},{\"name\":\"end_time\",\"type\":\"timestamp_ntz\",\"nullable\":true,\"metadata\":{}},{\"name\":\"context___trace_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"context___span_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"context___trace_state\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"context___trace_flags\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"context___is_remote\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"events\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"links\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___client___address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___client___port\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___server___address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___server___port\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___local__address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___local__port\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___peer___address\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___peer__port\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___protocol___name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___protocol___version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___transport\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___network___type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___code___number\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___code___file___path\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___code___function___name\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___code___line___number\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___code___stacktrace\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___log__record___original\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___log__record___uid\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___error___type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___exception___type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___exception___message\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___exception___stacktrace\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___url___fragment\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___url___full\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___url___path\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___url___query\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___url___scheme\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___user_agent___original\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___http___request___method\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___http___request___method_original\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___http___response___status_code\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___http___request___resend_count\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___http___request___body___size\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___session___id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___session___previous___id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___system___name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___collection___name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___namespace\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___operation___name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___response___status_code\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___operation___batch___size\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___query___summary\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___db___query___text\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___user___id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___user___email\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___user___full_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___user___name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"attributes___user___hash\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___service___name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___service___version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___service___instance___id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___service___namespace\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___telemetry___sdk___language\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___telemetry___sdk___name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___telemetry___sdk___version\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"resource___attributes___user_agent___original\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"project_id\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}},{\"name\":\"timestamp\",\"type\":\"timestamp_ntz\",\"nullable\":false,\"metadata\":{}}]}","partitionColumns":["project_id","timestamp"],"createdTime":1744387459169,"configuration":{}}} -{"commitInfo":{"timestamp":1744387459170,"operation":"CREATE TABLE","operationParameters":{"metadata":"{\"configuration\":{},\"createdTime\":1744387459169,\"description\":null,\"format\":{\"options\":{},\"provider\":\"parquet\"},\"id\":\"e202836c-97a3-4138-9638-950d5ca6c6f9\",\"name\":null,\"partitionColumns\":[\"project_id\",\"timestamp\"],\"schemaString\":\"{\\\"type\\\":\\\"struct\\\",\\\"fields\\\":[{\\\"name\\\":\\\"observed_timestamp\\\",\\\"type\\\":\\\"timestamp_ntz\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":false,\\\"metadata\\\":{}},{\\\"name\\\":\\\"parent_id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"kind\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"status_code\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"status_message\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"level\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"severity___severity_text\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"severity___severity_number\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"body\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"duration\\\",\\\"type\\\":\\\"long\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"start_time\\\",\\\"type\\\":\\\"timestamp_ntz\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"end_time\\\",\\\"type\\\":\\\"timestamp_ntz\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"context___trace_id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"context___span_id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"context___trace_state\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"context___trace_flags\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"context___is_remote\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"events\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"links\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___client___address\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___client___port\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___server___address\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___server___port\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___local__address\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___local__port\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___peer___address\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___peer__port\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___protocol___name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___protocol___version\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___transport\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___network___type\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___code___number\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___code___file___path\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___code___function___name\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___code___line___number\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___code___stacktrace\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___log__record___original\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___log__record___uid\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___error___type\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___exception___type\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___exception___message\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___exception___stacktrace\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___url___fragment\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___url___full\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___url___path\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___url___query\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___url___scheme\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___user_agent___original\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___http___request___method\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___http___request___method_original\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___http___response___status_code\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___http___request___resend_count\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___http___request___body___size\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___session___id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___session___previous___id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___system___name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___collection___name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___namespace\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___operation___name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___response___status_code\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___operation___batch___size\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___query___summary\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___db___query___text\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___user___id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___user___email\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___user___full_name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___user___name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"attributes___user___hash\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___service___name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___service___version\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___service___instance___id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___service___namespace\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___telemetry___sdk___language\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___telemetry___sdk___name\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___telemetry___sdk___version\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"resource___attributes___user_agent___original\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"project_id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":false,\\\"metadata\\\":{}},{\\\"name\\\":\\\"timestamp\\\",\\\"type\\\":\\\"timestamp_ntz\\\",\\\"nullable\\\":false,\\\"metadata\\\":{}}]}\"}","mode":"ErrorIfExists","protocol":"{\"minReaderVersion\":3,\"minWriterVersion\":7,\"readerFeatures\":[\"timestampNtz\"],\"writerFeatures\":[\"timestampNtz\"]}","location":"file:///Users/ipeluwa/Playground/Apt/timefusion/my_bucket"},"clientVersion":"delta-rs.0.25.0"}} \ No newline at end of file diff --git a/my_bucket/_delta_log/00000000000000000001.json b/my_bucket/_delta_log/00000000000000000001.json deleted file mode 100644 index 70819b7..0000000 --- a/my_bucket/_delta_log/00000000000000000001.json +++ /dev/null @@ -1,2 +0,0 @@ -{"add":{"path":"project_id=test/timestamp=2025-04-11%252016%253A04%253A19.186152/part-00001-2d3667a5-9934-4e4c-a31d-2c21962a6cef-c000.snappy.parquet","partitionValues":{"timestamp":"2025-04-11 16:04:19.186152","project_id":"test"},"size":23264,"modificationTime":1744387459220,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"observed_timestamp\":\"2025-04-11T16:04:19.186153Z\",\"duration\":1,\"start_time\":\"2025-04-11T16:04:19.186154Z\",\"context___span_id\":\"dummy_span\",\"id\":\"dummy_test\",\"context___trace_id\":\"dummy_trace\",\"name\":\"dummy\"},\"maxValues\":{\"id\":\"dummy_test\",\"start_time\":\"2025-04-11T16:04:19.186154Z\",\"context___trace_id\":\"dummy_trace\",\"observed_timestamp\":\"2025-04-11T16:04:19.186153Z\",\"name\":\"dummy\",\"context___span_id\":\"dummy_span\",\"duration\":1},\"nullCount\":{\"context___span_id\":0,\"end_time\":1,\"attributes___network___local__address\":1,\"context___trace_id\":0,\"context___trace_state\":1,\"attributes___server___address\":1,\"kind\":1,\"status_message\":1,\"attributes___network___protocol___version\":1,\"context___trace_flags\":1,\"attributes___network___local__port\":1,\"id\":0,\"attributes___client___port\":1,\"name\":0,\"severity___severity_text\":1,\"attributes___network___protocol___name\":1,\"status_code\":1,\"severity___severity_number\":1,\"duration\":0,\"attributes___network___transport\":1,\"start_time\":0,\"links\":1,\"attributes___client___address\":1,\"attributes___network___peer__port\":1,\"context___is_remote\":1,\"attributes___network___peer___address\":1,\"events\":1,\"observed_timestamp\":0,\"level\":1,\"body\":1,\"attributes___server___port\":1,\"parent_id\":1}}","tags":null,"baseRowId":null,"defaultRowCommitVersion":null,"clusteringProvider":null}} -{"commitInfo":{"timestamp":1744387459221,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"project_id\",\"timestamp\"]"},"operationMetrics":{"execution_time_ms":32,"num_added_files":1,"num_added_rows":1,"num_partitions":0,"num_removed_files":0},"clientVersion":"delta-rs.0.25.0"}} \ No newline at end of file diff --git a/my_bucket/project_id=test/timestamp=2025-04-11%2016%3A04%3A19.186152/part-00001-2d3667a5-9934-4e4c-a31d-2c21962a6cef-c000.snappy.parquet b/my_bucket/project_id=test/timestamp=2025-04-11%2016%3A04%3A19.186152/part-00001-2d3667a5-9934-4e4c-a31d-2c21962a6cef-c000.snappy.parquet deleted file mode 100644 index 777baf52f004c3fc9d08674a83816610e7caf85c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 23264 zcmeHPeP~qIwx3MZrfRKPXPUIBwYKhR{YWxNY|?x0d+*FlX7Xh+nI`k~?z>^WOfs2| zbUrf4{8glg6e%JiMMOkIq=-lnDWQlIxrme^rAUz?A|gKVNs&^d_{v*rpL6z{z0aAG z_#WK%$IZ~2oORaTzqR(-YpuQZnp}O7zty(Pw$`>`z-F~s)>|y~P5=4dw{GootgzTD z_nu3Q3O}uN7At@?**2+QYuZ=B`$Q>|nHwu63q^oj zW~=8QS5~W=uto33M%!l9Q(oy@ z1?nqan2*Mj1}j;mVK-J>oqs2yUUH_Xx& z?mVTP3j;M(IY(%R8HiYVrTW>DC6+89=AI>=_{aD}lfBcHf&;+P*kr$5XWNTsfqer~ zcdfQ|N>Sdm7i@o{)Jo)CU8dz7U9P1baMb(?E$`rqTIx1OU8vXcj=aRwI=_-VURp)R zpbBSK+h(XjcBF2-Y|B?wf4D{q_020<>hrZ)>JCSJwNA_XbiJ0kDN|JRt6IkIGKGxq zHfR|qY?MOAFY+TjX`Afd+8N`>YuXw2IqK^ME$_2NE%mcZA@=##8QLdru*V;&kEfg1 z+u!6zMUQK5Y9Wtp)KZT)>hfD!-l0ue>MloJd|S)gzgbKD#8F?qqvbv0sPEs^@=k8i zQcpPQD$(+ezNe)gaMb)(E$`qqEp?ltF1)Yh?c1)Ue&nb#J17sg;i>$nnD+Wk?WE(o zwA5pcy3(xW9d6N5_c-cOtCn}bp`~td)VVe-?+=c;(XQp4>d;cZRw*c6o(=V^j^^{Z z@>n50k<3JG$LpYNzQx+GJ{Kz_^E1iBSaC7~efnr-ddr4^gX^1iU~AW6TfN27V4X~C zu?#fHq_yFt>1aNgEspVTmt~`wq;}Y{smUxiZxx^yONFs`F2N04qr3<4h3GiPTAohM zB-2{dh623FPZsCK#>UuVF*#e*4tkw&G=s9GOe~q#4qFz>CFax@XpH(qDIYCP=CZPF zR)S*rA~g(W6FM}Rl}*NRR-;AZxhyIK3Kp@cEZAvXm5u#gforRB<;iFp&Lp}Kd(+YJ zg1QP+;MFR|WMM3y%;b246=1<^k(;tSJ(->2-n z(FhU3B|b$XNA^8lY&thS1_2Szm!BM;%tpEN!h`bTOMJp>ZkO`tbbIsB@eG8ucHC>+qLgu^G-o5DF}TrM zoCX7&k4wosOSfnn-h#3)dOR@)#8G?%mmn*Q!6`wrk5wQK>s6?3=2c~@hasv+c8JX_+q62CQkPS7n3&oY!bS>de5 z+qeL^TaGv0mwBUAM@{FGGm|-3!{ebX;yaDIeelYHa0piw*}i0MZ{um<4M*yF0xY1%wW9QVTVIXb_zt_qTDE20~@a2zVWz=4XaoxrVL5z{7-(AB& z6VU>f4hUE}SIR>%1c$V?u)s5ZlD-zbX4}G3435C&g=bA>Auh92CLaXb7oKbAhmKnF zipg{m@=cz`WrEfz(d>9hsBw2Ke!6I**2PaXu+`35v(0j>-P+jH_-v3^nrzD&J1i^K zHX2Bv$z3bCYHiwW zb*_h*&~nE=F|z-NvoC&NBb+Mq#zQ9_sIwocBiCLaM_)i?#J@2bu&<=OdyD-S1iJNs z?L!_2Z%RyqtINolWugWLeK7Gzr=WrMM#bhIT~40r=AXs+^WuEr=EWc$EGO5Oiz-}2 zh&v*Ld_!Y1=U0&9@b_Z8`R?G%{au3cHE(hZ?BI*!u@3ARf}In=Om2J(?)Ho1%8R0I zUkn4>4dE1HapFas_*9*!#sa%R(#diJ9C?Y{!x&J%+^9Zb4CvpY(<(7= zW7uZ5pZHLT0ab|RUaApuekJ+iB@m;2K~jA}G5WU}G51l-JyA@(dc|dixU`CVu}U#S zmmC_;aO#(T686^M4opGL)!8rBk+ZAGLx8bt)qX9*eMdv%)~*E2VLw><#-D6{E@<@k zXe!}Mo&9_rx%DzR@v@Nc6_ekiL0^oOnewx^ zIDu2|ic{rI0h{^NI&x&4ICCCn9{3kQdA{xtI|>Z()Ai)O4)G=;o>vjA%2{C2d0=oi z*ORZ;D+($u`{W~V*&kIf^4=6rZ9DXv5_BCTz2X{LUj-Sn_>u6UB1Ass$1>4Fw}4DH804dhsZ2znDizZ60F`Tf-PEXy z#V~((gIs)r*YpNrJ`gea+oFP5?e{Ri(@kW56AySF0Y7&M?!zyg?{=xZ6^8m-6ZuI+ zWlsMr06TtB#N`#`aiNRGAg{eizI;U^~e%egtH;alrMX=92!XkMO)OITw+L!N;gYSrF zN5X)1K}6$EUBeqUgMIc6xvhh}fUq}3SiX^`YZA)cPX_$`yX3;VqQ2iD@G}wk`MRV` z!pSXU-xg8A1C(&eE7+HBwK3I=I&=*F$rkdX4*t^!z+YP=yiwPc!Cxih3=y?IiSUm_ zc>bJc+N%tQYUnEQK$rSmzEV(Plb4EOdna%G#S z>p8?dC*txskm9oL9>crvJ~{lph<6R~eh~5afTnn2H=4oj+fMH3VDBO9QxSI2U26vZ z<92dsyQuRge+BRpeTr?>%8U;=ozB|sHiJL2gB;kQz$-VF<9~(X;;IU+7S((OM8Snl zIKzCpgWOUvSsm~ds{BwyA4Zl`F4?-BA)zu;jtI_nJT_%8B?4(cI7 zT@s=AIMsIG8QSArKWpdW^$@oL_CFv`v!!i3I}4fqtCz& zw~$|TV3!f>?4rQ1^UuKUwUEEJh??C(u`M zNzyR<4?q2qRF+)hQXjs8-;(5{?|}qbl6sMXSL$y`>TZWOu2B#C3_1bEIX)^$?m~AN zkequYKpKIUaisSk?UO!&@#Sv7o^S$8_b|{>FVYofH;fw(sNQL3>x;qW2YPNJ0;MiUmAk>qi(=+B0VrZ2v0dT;z=!lCxH^Kup1_gz*;3= zc>wWU@EDc~FmxPf_b9GCE#*KS+C=I_x-3m02-wEi?Lw`P-ZKu1aP@+1z+y1SH{1jC zxD)V3r8vBmkS;rc6$c5DYuF3;<7lc3jF3FzXu4sf%eW2RakkVpRJC6MRbe*Z^&mZ1 z22rj!iW-%-0qFux4M{k47_5Y=f&IWA$dFtuANZP^Q4mUzKi#8f1Nq~QBYyyg_VlIW zj;_{tW-uLU?b;g-`CH%4)uu4k95C1SIu6$e-xG%fej>2%3NNr8~LUGi@aFIQ;dLZJ|1doZk|OW;YQGp)5Gj3 zNiE2)U}!k!BB8;VM5r$n34qNUo)%ccbT}Jmf%mx9mPl|m9Sa6p;@LFPm58^9zYF1D zM>Y}kWKmx80`mHXK4@;91D{=39`2bV8kF~xt|v5D4g)Q9r%I;#)#FC~46b&e{favR zbBRn^S#!D+Z}U&Y6hDdvhoubaYhu4LT@S?6bFL?$^@pJm^YuuT8CXx|`PT#f9R#;_ z(fPIg+cSdr5a^IkYQzrgb1objgyaGrZwklP#_Mx02Co!`IH7p`RDNx|Jyib$jy2IQ zlL*chLJnokl6wkv&Ln?GdpaS1cri4*I$v)^zkoIGg;<`525J5o$MLoEk+YZbF}_Xk z(5E2&%cl10bt6A%xL${U67`CQ0%;`a(e?=@cUR5*=!($fktC+DrU zKrt5d%q4OBU&#q zKzb6MQ&A7fADm8R0M_lEHMNf?g?#FZI)6~@19>{`?JB`O(p;6mJ%KzHc63c)o&x7SwR0Gsy{t?YU$7AJ72}z9?+cubv zgC8iqQHJ~tX8@e-;PW9zn$D5-(D}9TeN=vA5%|+|kTg;;)z?)){~A@!Gs=hy_Lxls1C<1g7p!{%rHH31 z1vN~2P|Dn1PIn8~ ztN1+j!nz{r?V1WXx}cs)OoRVpksuBEMoiZeM*bWIv_9W(>%n~4Mr*%fB;?P<95e|? z;|t(_Oz~-bq|0ZJPtx&p*F*&NfbIvUySuv!OkI1w-38>=JbyB?G;f6+6Hs$eSC_g> z^OclCKF!ZY=Z}nsz9Z*!xO$l5Ij1NekG|S?Wym)hcrR(lPWo zN2E38ElYV{$aKG3DWBGl2K&)hz!-^S`(RI6q1GRkb}xX}?Tdl`U}>!PA2?T2k&v?m z@=uAz9>hV|QG~mNCk1Cnp$n><+WYGqMLv3tf&J1+FZfOmI|IQd2P=3#!QoPm+hK0c z6!MMhAxEGc?j~iH=iB@pag15GUzzLc4kN#YKSHh1Hv_&7`Ao$F|218|G};CEtj9Q? z6=IGAVD+WpT$+RY1yG{;i>0^CpO zeKwX26vJ>|!0}FB3haYVcrm9Rr!+xxS$MnZZ}c@?snly6zp0qRPq8D}Kq=hD{7^o> zJ??g}2L+}11o4!5FXGFC{iy)_f!z)GX+^33&ak#vTOaK$#C!T?5?-hQ9Q4d^Kuub~oP}HS@dEm{ zQ2o)B^!bn1-#Hg+^Uq)%iMdeiFLY9Tl%(^=?q)dOX#S&Y#UB^59xt5LkZYK&+?Cy? z`OrIpe40D;@zepmm;~el+N&r>;QT}V!$JQP_6Fh@AH8%w-DJ_`vmth_b|q1?eoIjj{Lqw*az}lDjbBpc6g>D*!yaut{&cAE5ErakeYP) zEsEQsvlrIi*$B0N2x`DMoS~AZ0#|91dcJoA`Nrpw7LU`{6|br9fP3-!ItC$6D)pDD z?~vPEUnvEpb`5>Cc$~h{IQnz#{d4vrAA6#bcNXf`Vg>E{5&DnGc@{^0nZ{oldP1c{ z5A0=+8}*|_pBq8&qwPobQT?Gu4g2TlDnZY&U9LaV6+Ba8LFm;DyQa+bb?>2ks_!7w z`u=pXhn|^oeG}-)#Jrv?jgP^Kxjxbe?icptd3}_#5$-@O;Ab>GJwEe(q>u73S2!V` zbFhY&+9Xs!%dYx{R5~kJXfr|v@eh3ZEk<=dt+S+Vh;dVU z!X37`lOB*!N4g~UB%Ceq(nOvsj(q9`UbW|m_kVb4vfs1FFVlWC)MAh;!1riR602TF zUh*m3sT6kOGA-B3YCR=+G_D7aFoilG4GoF~FTqamvR#7jxF{qr=^*%G2|m=H;dggG z7ZFfGz&Hp&HFP*Gjpvx7kA5!@2`VuI{TApWBj8^w4UO=xGQi(g6nEweo0PvO-qg{) c%dx8!{_k4wZ#Vpt{Ac+ur^Vuff7|+h02!!a_y7O^ From 91c6dd82010a7c131f1ac2166ae772a2c3e19733 Mon Sep 17 00:00:00 2001 From: Oluwapeluwa Ibrahim Date: Wed, 16 Apr 2025 10:10:42 +0100 Subject: [PATCH 14/14] Cargo fmt --- src/database.rs | 102 +++++++++++++----------------- src/main.rs | 27 +++----- src/persistent_queue.rs | 127 ++++++++++++++++++-------------------- tests/integration_test.rs | 37 +++-------- 4 files changed, 120 insertions(+), 173 deletions(-) diff --git a/src/database.rs b/src/database.rs index d9ee8b7..d0c1d5a 100644 --- a/src/database.rs +++ b/src/database.rs @@ -2,19 +2,20 @@ use std::{any::Any, collections::HashMap, fmt, sync::Arc}; use arrow_schema::SchemaRef; use async_trait::async_trait; -use datafusion::arrow::array::Array; -use datafusion::catalog::Session; -use datafusion::common::{SchemaExt, not_impl_err}; -use datafusion::execution::{TaskContext, context::SessionContext}; -use datafusion::logical_expr::{BinaryExpr, Expr, Operator, TableProviderFilterPushDown, dml::InsertOp}; -use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, SendableRecordBatchStream, - insert::{DataSink, DataSinkExec}, +use datafusion::{ + arrow::array::Array, + catalog::Session, + common::{SchemaExt, not_impl_err}, + execution::{TaskContext, context::SessionContext}, + logical_expr::{BinaryExpr, Expr, Operator, TableProviderFilterPushDown, dml::InsertOp}, + physical_plan::{ + DisplayAs, DisplayFormatType, ExecutionPlan, SendableRecordBatchStream, + insert::{DataSink, DataSinkExec}, + }, + scalar::ScalarValue, }; -use datafusion::scalar::ScalarValue; use delta_kernel::arrow::record_batch::RecordBatch; -use deltalake::checkpoints; -use deltalake::{storage::StorageOptions, DeltaOps, DeltaTable, DeltaTableBuilder}; +use deltalake::{DeltaOps, DeltaTable, DeltaTableBuilder, checkpoints, storage::StorageOptions}; use futures::StreamExt; use tokio::sync::RwLock; use tokio_util::sync::CancellationToken; @@ -50,8 +51,7 @@ impl Database { let storage_uri = format!("s3://{}/{}/?endpoint={}", config.s3_bucket, config.table_prefix, config.s3_endpoint); info!("Storage URI configured: {}", storage_uri); - let aws_url = Url::parse(&config.s3_endpoint) - .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; + let aws_url = Url::parse(&config.s3_endpoint).map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; deltalake::aws::register_handlers(Some(aws_url)); info!("AWS handlers registered"); @@ -81,8 +81,7 @@ impl Database { let storage_uri = format!("s3://{}/{}/?endpoint={}", config.s3_bucket, config.table_prefix, config.s3_endpoint); info!("Storage URI configured: {}", storage_uri); - let aws_url = Url::parse(&config.s3_endpoint) - .map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; + let aws_url = Url::parse(&config.s3_endpoint).map_err(|e| TimeFusionError::Generic(anyhow::anyhow!("Invalid AWS endpoint URL: {}", e)))?; deltalake::aws::register_handlers(Some(aws_url)); info!("AWS handlers registered"); @@ -165,42 +164,36 @@ impl Database { "public".to_string(), ]; - let batch = RecordBatch::try_new( - schema.clone(), - vec![ - Arc::new(StringArray::from(names)), - Arc::new(StringArray::from(settings)), - ], - )?; + let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(StringArray::from(names)), Arc::new(StringArray::from(settings))])?; ctx.register_batch("pg_settings", batch)?; Ok(()) } #[tracing::instrument(name = "db.register_set_config_udf", skip(self, ctx))] pub fn register_set_config_udf(&self, ctx: &SessionContext) { - use datafusion::arrow::array::{StringArray, StringBuilder}; - use datafusion::arrow::datatypes::DataType; - use datafusion::logical_expr::{create_udf, ColumnarValue, ScalarFunctionImplementation, Volatility}; - - let set_config_fn: ScalarFunctionImplementation = - Arc::new(move |args: &[ColumnarValue]| -> datafusion::error::Result { - let param_value_array = match &args[1] { - ColumnarValue::Array(array) => array - .as_any() - .downcast_ref::() - .expect("set_config second arg must be a StringArray"), - _ => panic!("set_config second arg must be an array"), - }; - let mut builder = StringBuilder::new(); - for i in 0..param_value_array.len() { - if param_value_array.is_null(i) { - builder.append_null(); - } else { - builder.append_value(param_value_array.value(i)); - } + use datafusion::{ + arrow::{ + array::{StringArray, StringBuilder}, + datatypes::DataType, + }, + logical_expr::{ColumnarValue, ScalarFunctionImplementation, Volatility, create_udf}, + }; + + let set_config_fn: ScalarFunctionImplementation = Arc::new(move |args: &[ColumnarValue]| -> datafusion::error::Result { + let param_value_array = match &args[1] { + ColumnarValue::Array(array) => array.as_any().downcast_ref::().expect("set_config second arg must be a StringArray"), + _ => panic!("set_config second arg must be an array"), + }; + let mut builder = StringBuilder::new(); + for i in 0..param_value_array.len() { + if param_value_array.is_null(i) { + builder.append_null(); + } else { + builder.append_value(param_value_array.value(i)); } - Ok(ColumnarValue::Array(Arc::new(builder.finish()))) - }); + } + Ok(ColumnarValue::Array(Arc::new(builder.finish()))) + }); let set_config_udf = create_udf( "set_config", vec![DataType::Utf8, DataType::Utf8, DataType::Boolean], @@ -213,10 +206,7 @@ impl Database { #[tracing::instrument(name = "db.start_pgwire_server", skip(self, session_context, shutdown_token), fields(port))] pub async fn start_pgwire_server( - &self, - session_context: SessionContext, - port: u16, - shutdown_token: CancellationToken, + &self, session_context: SessionContext, port: u16, shutdown_token: CancellationToken, ) -> Result> { use datafusion_postgres::{DfSessionService, HandlerFactory}; use tokio::net::TcpListener; @@ -385,12 +375,7 @@ impl Database { #[tracing::instrument(name = "db.register_project", skip(self, bucket, access_key, secret_key, endpoint), fields(project_id))] pub async fn register_project( - &self, - project_id: &str, - bucket: &str, - access_key: Option<&str>, - secret_key: Option<&str>, - endpoint: Option<&str>, + &self, project_id: &str, bucket: &str, access_key: Option<&str>, secret_key: Option<&str>, endpoint: Option<&str>, ) -> Result<()> { let full_uri = if bucket.starts_with("s3://") || bucket.starts_with("gs://") { bucket.to_string() @@ -489,8 +474,8 @@ async fn merge_batches_into_new_version(table: DeltaTable, batches: Vec, - schema: SchemaRef, + database: Arc, + schema: SchemaRef, } impl ProjectRoutingTable { @@ -611,7 +596,9 @@ impl TableProvider for ProjectRoutingTable { Ok(filter.iter().map(|_| TableProviderFilterPushDown::Inexact).collect()) } - async fn scan(&self, state: &dyn Session, projection: Option<&Vec>, filters: &[Expr], limit: Option) -> datafusion::error::Result> { + async fn scan( + &self, state: &dyn Session, projection: Option<&Vec>, filters: &[Expr], limit: Option, + ) -> datafusion::error::Result> { let project_id = self.extract_project_id_from_filters(filters).unwrap_or_else(|| self.default_project.clone()); let delta_table = self.database.resolve_table(&project_id).await?; let table = delta_table.read().await; @@ -619,7 +606,6 @@ impl TableProvider for ProjectRoutingTable { } } - #[cfg(test)] mod tests { use chrono::{TimeZone, Utc}; diff --git a/src/main.rs b/src/main.rs index 3a2d368..0de4b3e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,14 +5,14 @@ mod persistent_queue; mod telemetry; use std::{env, sync::Arc}; -use actix_web::{middleware::Logger, post, web, App, HttpResponse, HttpServer, Responder}; +use actix_web::{App, HttpResponse, HttpServer, Responder, middleware::Logger, post, web}; use database::Database; use dotenv::dotenv; use futures::TryFutureExt; use serde::Deserialize; use tokio::{ sync::mpsc, - time::{sleep, Duration}, + time::{Duration, sleep}, }; use tokio_util::sync::CancellationToken; use tracing::{error, info}; @@ -53,10 +53,7 @@ struct RegisterProjectRequest { fields(project_id = %req.project_id) )] #[post("/register_project")] -async fn register_project( - req: web::Json, - app_state: web::Data -) -> Result { +async fn register_project(req: web::Json, app_state: web::Data) -> Result { app_state .db .register_project( @@ -97,8 +94,7 @@ async fn main() -> Result<()> { // Create a DataFusion session context for queries and compaction. let session_context = db.create_session_context(); - db.setup_session_context(&session_context) - .expect("Failed to setup session context"); + db.setup_session_context(&session_context).expect("Failed to setup session context"); let db = Arc::new(db); let (shutdown_tx, _shutdown_rx) = mpsc::channel::(1); @@ -121,22 +117,15 @@ async fn main() -> Result<()> { // Determine PGWire server port: check for a PGWIRE_PORT environment variable, // falling back to the port from the configuration. - let pgwire_port = env::var("PGWIRE_PORT") - .ok() - .and_then(|port_str| port_str.parse::().ok()) - .unwrap_or(config.pg_port); + let pgwire_port = env::var("PGWIRE_PORT").ok().and_then(|port_str| port_str.parse::().ok()).unwrap_or(config.pg_port); info!("Starting PGWire server on port: {}", pgwire_port); - let pg_server = db - .start_pgwire_server(session_context.clone(), pgwire_port, shutdown_token.clone()) - .await?; - + let pg_server = db.start_pgwire_server(session_context.clone(), pgwire_port, shutdown_token.clone()).await?; + sleep(Duration::from_secs(1)).await; if pg_server.is_finished() { error!("PGWire server failed to start, aborting..."); - return Err(TimeFusionError::Generic(anyhow::anyhow!( - "PGWire server failed to start" - ))); + return Err(TimeFusionError::Generic(anyhow::anyhow!("PGWire server failed to start"))); } let http_addr = format!("0.0.0.0:{}", config.http_port); diff --git a/src/persistent_queue.rs b/src/persistent_queue.rs index 891fcd0..d679029 100644 --- a/src/persistent_queue.rs +++ b/src/persistent_queue.rs @@ -1,10 +1,9 @@ -use std::str::FromStr; -use std::sync::Arc; +use std::{str::FromStr, sync::Arc}; use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaRef, TimeUnit}; use delta_kernel::schema::StructField; use log::debug; -use serde::{de::Error as DeError, Deserialize, Deserializer, Serialize}; +use serde::{Deserialize, Deserializer, Serialize, de::Error as DeError}; use serde_arrow::schema::{SchemaLike, TracingOptions}; use serde_json::json; use serde_with::serde_as; @@ -20,12 +19,12 @@ pub struct OtelLogsAndSpans { pub observed_timestamp: Option>, // Identification and log details. - pub id: String, - pub parent_id: Option, - pub hashes: Vec, // all relevant hashes can be stored here for item identification - pub name: Option, - pub kind: Option, // logs, span, request - pub status_code: Option, + pub id: String, + pub parent_id: Option, + pub hashes: Vec, // all relevant hashes can be stored here for item identification + pub name: Option, + pub kind: Option, // logs, span, request + pub status_code: Option, pub status_message: Option, // Logs specific – using master branch's expanded fields. @@ -34,7 +33,7 @@ pub struct OtelLogsAndSpans { // Severity: added in master. pub severity: Option, // severity as json - pub severity___severity_text: Option, + pub severity___severity_text: Option, pub severity___severity_number: Option, pub body: Option, // body as json @@ -44,105 +43,105 @@ pub struct OtelLogsAndSpans { #[serde(with = "chrono::serde::ts_microseconds_option")] pub start_time: Option>, #[serde(with = "chrono::serde::ts_microseconds_option")] - pub end_time: Option>, + pub end_time: Option>, // Context: master adds a JSON context field. - pub context: Option, // context as json - pub context___trace_id: Option, - pub context___span_id: Option, + pub context: Option, // context as json + pub context___trace_id: Option, + pub context___span_id: Option, pub context___trace_state: Option, pub context___trace_flags: Option, - pub context___is_remote: Option, + pub context___is_remote: Option, // Events and Links. pub events: Option, // events json - pub links: Option, // links json + pub links: Option, // links json // Attributes. - pub attributes: Option, // attributes object as json + pub attributes: Option, // attributes object as json // Server and client addresses. pub attributes___client___address: Option, - pub attributes___client___port: Option, + pub attributes___client___port: Option, pub attributes___server___address: Option, - pub attributes___server___port: Option, + pub attributes___server___port: Option, // Network attributes. - pub attributes___network___local__address: Option, - pub attributes___network___local__port: Option, - pub attributes___network___peer___address: Option, - pub attributes___network___peer__port: Option, - pub attributes___network___protocol___name: Option, + pub attributes___network___local__address: Option, + pub attributes___network___local__port: Option, + pub attributes___network___peer___address: Option, + pub attributes___network___peer__port: Option, + pub attributes___network___protocol___name: Option, pub attributes___network___protocol___version: Option, - pub attributes___network___transport: Option, - pub attributes___network___type: Option, + pub attributes___network___transport: Option, + pub attributes___network___type: Option, // Source Code Attributes. - pub attributes___code___number: Option, - pub attributes___code___file___path: Option, + pub attributes___code___number: Option, + pub attributes___code___file___path: Option, pub attributes___code___function___name: Option, - pub attributes___code___line___number: Option, - pub attributes___code___stacktrace: Option, + pub attributes___code___line___number: Option, + pub attributes___code___stacktrace: Option, // Log records. pub attributes___log__record___original: Option, - pub attributes___log__record___uid: Option, + pub attributes___log__record___uid: Option, // Exception Attributes. - pub attributes___error___type: Option, - pub attributes___exception___type: Option, - pub attributes___exception___message: Option, + pub attributes___error___type: Option, + pub attributes___exception___type: Option, + pub attributes___exception___message: Option, pub attributes___exception___stacktrace: Option, // URL Attributes. pub attributes___url___fragment: Option, - pub attributes___url___full: Option, - pub attributes___url___path: Option, - pub attributes___url___query: Option, - pub attributes___url___scheme: Option, + pub attributes___url___full: Option, + pub attributes___url___path: Option, + pub attributes___url___query: Option, + pub attributes___url___scheme: Option, // Useragent. pub attributes___user_agent___original: Option, // HTTP Attributes. - pub attributes___http___request___method: Option, + pub attributes___http___request___method: Option, pub attributes___http___request___method_original: Option, - pub attributes___http___response___status_code: Option, - pub attributes___http___request___resend_count: Option, - pub attributes___http___request___body___size: Option, + pub attributes___http___response___status_code: Option, + pub attributes___http___request___resend_count: Option, + pub attributes___http___request___body___size: Option, // Session Attributes. - pub attributes___session___id: Option, + pub attributes___session___id: Option, pub attributes___session___previous___id: Option, // Database Attributes. - pub attributes___db___system___name: Option, - pub attributes___db___collection___name: Option, - pub attributes___db___namespace: Option, - pub attributes___db___operation___name: Option, - pub attributes___db___response___status_code: Option, + pub attributes___db___system___name: Option, + pub attributes___db___collection___name: Option, + pub attributes___db___namespace: Option, + pub attributes___db___operation___name: Option, + pub attributes___db___response___status_code: Option, pub attributes___db___operation___batch___size: Option, - pub attributes___db___query___summary: Option, - pub attributes___db___query___text: Option, + pub attributes___db___query___summary: Option, + pub attributes___db___query___text: Option, // User Attributes. - pub attributes___user___id: Option, - pub attributes___user___email: Option, + pub attributes___user___id: Option, + pub attributes___user___email: Option, pub attributes___user___full_name: Option, - pub attributes___user___name: Option, - pub attributes___user___hash: Option, + pub attributes___user___name: Option, + pub attributes___user___hash: Option, // Resource data. pub resource: Option, // resource as json // Resource Attributes using master branch naming. - pub resource___service___name: Option, - pub resource___service___version: Option, + pub resource___service___name: Option, + pub resource___service___version: Option, pub resource___service___instance___id: Option, - pub resource___service___namespace: Option, + pub resource___service___namespace: Option, pub resource___telemetry___sdk___language: Option, - pub resource___telemetry___sdk___name: Option, - pub resource___telemetry___sdk___version: Option, + pub resource___telemetry___sdk___name: Option, + pub resource___telemetry___sdk___version: Option, pub resource___user_agent___original: Option, @@ -192,10 +191,7 @@ impl OtelLogsAndSpans { pub fn columns() -> anyhow::Result> { let fields = OtelLogsAndSpans::fields()?; - let vec_refs: Vec = fields - .iter() - .map(|arc_field| arc_field.as_ref().try_into().unwrap()) - .collect(); + let vec_refs: Vec = fields.iter().map(|arc_field| arc_field.as_ref().try_into().unwrap()).collect(); assert_eq!(fields[fields.len() - 2].data_type(), &DataType::Utf8); assert_eq!(fields[fields.len() - 1].data_type(), &DataType::Date32); debug!("schema_field columns {:?}", vec_refs); @@ -207,10 +203,7 @@ impl OtelLogsAndSpans { log::error!("Failed to get columns: {:?}", e); Vec::new() }); - let arrow_fields: Vec = columns - .iter() - .filter_map(|sf| sf.try_into().ok()) - .collect(); + let arrow_fields: Vec = columns.iter().filter_map(|sf| sf.try_into().ok()).collect(); Arc::new(Schema::new(arrow_fields)) } diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 431a3dc..855a95e 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -68,16 +68,10 @@ async fn start_test_server() -> Result<(Arc, String, u16)> { let session_context = db.create_session_context(); db.setup_session_context(&session_context).expect("Failed to setup session context"); - let port = std::env::var("PGWIRE_PORT") - .expect("PGWIRE_PORT not set") - .parse::() - .expect("Invalid PGWIRE_PORT"); + let port = std::env::var("PGWIRE_PORT").expect("PGWIRE_PORT not set").parse::().expect("Invalid PGWIRE_PORT"); let shutdown_token = CancellationToken::new(); - let pg_server = db - .start_pgwire_server(session_context, port, shutdown_token.clone()) - .await - .expect("Failed to start PGWire server"); + let pg_server = db.start_pgwire_server(session_context, port, shutdown_token.clone()).await.expect("Failed to start PGWire server"); shutdown_signal_clone.notified().await; shutdown_token.cancel(); @@ -85,10 +79,7 @@ async fn start_test_server() -> Result<(Arc, String, u16)> { }); // Increase retry timeout to 10 seconds. - let port = std::env::var("PGWIRE_PORT") - .expect("PGWIRE_PORT not set") - .parse::() - .expect("Invalid PGWIRE_PORT"); + let port = std::env::var("PGWIRE_PORT").expect("PGWIRE_PORT not set").parse::().expect("Invalid PGWIRE_PORT"); let _ = connect_with_retry(port, Duration::from_secs(10)).await?; Ok((shutdown_signal, test_id, port)) } @@ -153,9 +144,7 @@ async fn test_postgres_integration() -> Result<()> { let shutdown = || shutdown_signal.notify_one(); let shutdown_guard = scopeguard::guard((), |_| shutdown()); - let (client, _) = connect_with_retry(port, Duration::from_secs(3)) - .await - .expect("Failed to connect to PostgreSQL"); + let (client, _) = connect_with_retry(port, Duration::from_secs(3)).await.expect("Failed to connect to PostgreSQL"); // Use an insert query that includes extra columns ("date" and "hashes") as per the master branch. let timestamp_str = format!("'{}'", chrono::Utc::now().format("%Y-%m-%d %H:%M:%S")); @@ -244,9 +233,7 @@ async fn test_concurrent_postgres_requests() -> Result<()> { let mut handles = Vec::with_capacity(num_clients); for i in 0..num_clients { - let (client, _) = connect_with_retry(port, Duration::from_secs(3)) - .await - .expect("Failed to connect to PostgreSQL"); + let (client, _) = connect_with_retry(port, Duration::from_secs(3)).await.expect("Failed to connect to PostgreSQL"); let insert_query = insert_query.clone(); let inserted_ids_clone = Arc::clone(&inserted_ids); let test_id_prefix = format!("{}-client-{}", test_id, i); @@ -300,9 +287,7 @@ async fn test_concurrent_postgres_requests() -> Result<()> { handle.await.expect("Task should complete successfully"); } - let (client, _) = connect_with_retry(port, Duration::from_secs(3)) - .await - .expect("Failed to connect to PostgreSQL"); + let (client, _) = connect_with_retry(port, Duration::from_secs(3)).await.expect("Failed to connect to PostgreSQL"); let count_rows = client .query(&format!("SELECT COUNT(*) FROM otel_logs_and_spans WHERE id LIKE '{}%'", test_id), &[]) @@ -332,9 +317,7 @@ async fn test_concurrent_postgres_requests() -> Result<()> { let mut query_handles = Vec::with_capacity(num_query_clients); let query_times = Arc::new(Mutex::new(Vec::new())); for _ in 0..num_query_clients { - let (client, _) = connect_with_retry(port, Duration::from_secs(3)) - .await - .expect("Failed to connect to PostgreSQL"); + let (client, _) = connect_with_retry(port, Duration::from_secs(3)).await.expect("Failed to connect to PostgreSQL"); let test_id = test_id.clone(); let query_times = Arc::clone(&query_times); let handle = tokio::spawn(async move { @@ -376,11 +359,7 @@ async fn test_concurrent_postgres_requests() -> Result<()> { } let times = query_times.lock().unwrap(); let total_time: Duration = times.iter().sum(); - let avg_time = if times.is_empty() { - Duration::new(0, 0) - } else { - total_time / times.len() as u32 - }; + let avg_time = if times.is_empty() { Duration::new(0, 0) } else { total_time / times.len() as u32 }; println!("Average query execution time per client: {:?}", avg_time); std::mem::drop(shutdown_guard);