Skip to content

Commit 0da46fa

Browse files
feat: added internal stream "meta" (#801)
This PR adds internal stream "meta" to be used in distributed mode only. The query node creates this internal stream. It fetches cluster metrics from all ingestion nodes, adds the metrics data to an event and ingests the event to this internal stream. This data will be used by console in cluster page to show the stats per ingestor.
1 parent ad7f67e commit 0da46fa

File tree

9 files changed

+359
-80
lines changed

9 files changed

+359
-80
lines changed

server/src/event/writer.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use std::{
2626
};
2727

2828
use crate::{
29+
handlers::http::cluster::INTERNAL_STREAM_NAME,
2930
option::{Mode, CONFIG},
3031
utils,
3132
};
@@ -132,7 +133,7 @@ impl WriterTable {
132133
parsed_timestamp: NaiveDateTime,
133134
custom_partition_values: &HashMap<String, String>,
134135
) -> Result<(), StreamWriterError> {
135-
if CONFIG.parseable.mode != Mode::Query {
136+
if CONFIG.parseable.mode != Mode::Query || stream_name == INTERNAL_STREAM_NAME {
136137
stream_writer.lock().unwrap().push(
137138
stream_name,
138139
schema_key,
@@ -161,7 +162,7 @@ impl WriterTable {
161162
) -> Result<(), StreamWriterError> {
162163
match map.get(stream_name) {
163164
Some(writer) => {
164-
if CONFIG.parseable.mode != Mode::Query {
165+
if CONFIG.parseable.mode != Mode::Query || stream_name == INTERNAL_STREAM_NAME {
165166
writer.lock().unwrap().push(
166167
stream_name,
167168
schema_key,
@@ -174,7 +175,7 @@ impl WriterTable {
174175
}
175176
}
176177
None => {
177-
if CONFIG.parseable.mode != Mode::Query {
178+
if CONFIG.parseable.mode != Mode::Query || stream_name == INTERNAL_STREAM_NAME {
178179
let mut writer = Writer::default();
179180
writer.push(
180181
stream_name,

server/src/handlers/http/cluster/mod.rs

Lines changed: 113 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ pub mod utils;
2121
use crate::handlers::http::cluster::utils::{
2222
check_liveness, to_url_string, IngestionStats, QueriedStats,
2323
};
24-
use crate::handlers::http::ingest::PostError;
24+
use crate::handlers::http::ingest::{ingest_internal_stream, PostError};
2525
use crate::handlers::http::logstream::error::StreamError;
2626
use crate::handlers::{STATIC_SCHEMA_FLAG, TIME_PARTITION_KEY};
2727
use crate::option::CONFIG;
@@ -46,8 +46,13 @@ type IngestorMetadataArr = Vec<IngestorMetadata>;
4646
use self::utils::StorageStats;
4747

4848
use super::base_path_without_preceding_slash;
49+
use std::time::Duration;
4950

5051
use super::modal::IngestorMetadata;
52+
use clokwerk::{AsyncScheduler, Interval};
53+
pub const INTERNAL_STREAM_NAME: &str = "meta";
54+
55+
const CLUSTER_METRICS_INTERVAL_SECONDS: Interval = clokwerk::Interval::Minutes(1);
5156

5257
pub async fn sync_cache_with_ingestors(
5358
url: &str,
@@ -432,50 +437,11 @@ pub async fn get_cluster_info() -> Result<impl Responder, StreamError> {
432437
}
433438

434439
pub async fn get_cluster_metrics() -> Result<impl Responder, PostError> {
435-
let ingestor_metadata = get_ingestor_info().await.map_err(|err| {
436-
log::error!("Fatal: failed to get ingestor info: {:?}", err);
437-
PostError::Invalid(err)
440+
let dresses = fetch_cluster_metrics().await.map_err(|err| {
441+
log::error!("Fatal: failed to fetch cluster metrics: {:?}", err);
442+
PostError::Invalid(err.into())
438443
})?;
439444

440-
let mut dresses = vec![];
441-
442-
for ingestor in ingestor_metadata {
443-
let uri = Url::parse(&format!(
444-
"{}{}/metrics",
445-
&ingestor.domain_name,
446-
base_path_without_preceding_slash()
447-
))
448-
.map_err(|err| {
449-
PostError::Invalid(anyhow::anyhow!("Invalid URL in Ingestor Metadata: {}", err))
450-
})?;
451-
452-
let res = reqwest::Client::new()
453-
.get(uri)
454-
.header(header::CONTENT_TYPE, "application/json")
455-
.send()
456-
.await;
457-
458-
if let Ok(res) = res {
459-
let text = res.text().await.map_err(PostError::NetworkError)?;
460-
let lines: Vec<Result<String, std::io::Error>> =
461-
text.lines().map(|line| Ok(line.to_owned())).collect_vec();
462-
463-
let sample = prometheus_parse::Scrape::parse(lines.into_iter())
464-
.map_err(|err| PostError::CustomError(err.to_string()))?
465-
.samples;
466-
467-
dresses.push(Metrics::from_prometheus_samples(
468-
sample,
469-
ingestor.domain_name,
470-
));
471-
} else {
472-
log::warn!(
473-
"Failed to fetch metrics from ingestor: {}\n",
474-
ingestor.domain_name,
475-
);
476-
}
477-
}
478-
479445
Ok(actix_web::HttpResponse::Ok().json(dresses))
480446
}
481447

@@ -545,3 +511,107 @@ pub async fn remove_ingestor(req: HttpRequest) -> Result<impl Responder, PostErr
545511
log::info!("{}", &msg);
546512
Ok((msg, StatusCode::OK))
547513
}
514+
515+
async fn fetch_cluster_metrics() -> Result<Vec<Metrics>, PostError> {
516+
let ingestor_metadata = get_ingestor_info().await.map_err(|err| {
517+
log::error!("Fatal: failed to get ingestor info: {:?}", err);
518+
PostError::Invalid(err)
519+
})?;
520+
521+
let mut dresses = vec![];
522+
523+
for ingestor in ingestor_metadata {
524+
let uri = Url::parse(&format!(
525+
"{}{}/metrics",
526+
&ingestor.domain_name,
527+
base_path_without_preceding_slash()
528+
))
529+
.map_err(|err| {
530+
PostError::Invalid(anyhow::anyhow!("Invalid URL in Ingestor Metadata: {}", err))
531+
})?;
532+
533+
let res = reqwest::Client::new()
534+
.get(uri)
535+
.header(header::CONTENT_TYPE, "application/json")
536+
.send()
537+
.await;
538+
539+
if let Ok(res) = res {
540+
let text = res.text().await.map_err(PostError::NetworkError)?;
541+
let lines: Vec<Result<String, std::io::Error>> =
542+
text.lines().map(|line| Ok(line.to_owned())).collect_vec();
543+
544+
let sample = prometheus_parse::Scrape::parse(lines.into_iter())
545+
.map_err(|err| PostError::CustomError(err.to_string()))?
546+
.samples;
547+
let ingestor_metrics = Metrics::from_prometheus_samples(sample, &ingestor)
548+
.await
549+
.map_err(|err| {
550+
log::error!("Fatal: failed to get ingestor metrics: {:?}", err);
551+
PostError::Invalid(err.into())
552+
})?;
553+
dresses.push(ingestor_metrics);
554+
} else {
555+
log::warn!(
556+
"Failed to fetch metrics from ingestor: {}\n",
557+
&ingestor.domain_name,
558+
);
559+
}
560+
}
561+
Ok(dresses)
562+
}
563+
564+
pub fn init_cluster_metrics_schedular() -> Result<(), PostError> {
565+
log::info!("Setting up schedular for cluster metrics ingestion");
566+
567+
let mut scheduler = AsyncScheduler::new();
568+
scheduler
569+
.every(CLUSTER_METRICS_INTERVAL_SECONDS)
570+
.run(move || async {
571+
let result: Result<(), PostError> = async {
572+
let cluster_metrics = fetch_cluster_metrics().await;
573+
if let Ok(metrics) = cluster_metrics {
574+
if !metrics.is_empty() {
575+
log::info!("Cluster metrics fetched successfully from all ingestors");
576+
if let Ok(metrics_bytes) = serde_json::to_vec(&metrics) {
577+
let stream_name = INTERNAL_STREAM_NAME;
578+
579+
if matches!(
580+
ingest_internal_stream(
581+
stream_name.to_string(),
582+
bytes::Bytes::from(metrics_bytes),
583+
)
584+
.await,
585+
Ok(())
586+
) {
587+
log::info!(
588+
"Cluster metrics successfully ingested into internal stream"
589+
);
590+
} else {
591+
log::error!(
592+
"Failed to ingest cluster metrics into internal stream"
593+
);
594+
}
595+
} else {
596+
log::error!("Failed to serialize cluster metrics");
597+
}
598+
}
599+
}
600+
Ok(())
601+
}
602+
.await;
603+
604+
if let Err(err) = result {
605+
log::error!("Error in cluster metrics scheduler: {:?}", err);
606+
}
607+
});
608+
609+
tokio::spawn(async move {
610+
loop {
611+
scheduler.run_pending().await;
612+
tokio::time::sleep(Duration::from_secs(10)).await;
613+
}
614+
});
615+
616+
Ok(())
617+
}

server/src/handlers/http/ingest.rs

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*
1717
*/
1818

19+
use super::cluster::INTERNAL_STREAM_NAME;
1920
use super::logstream::error::CreateStreamError;
2021
use super::{kinesis, otel};
2122
use crate::event::{
@@ -52,6 +53,12 @@ pub async fn ingest(req: HttpRequest, body: Bytes) -> Result<HttpResponse, PostE
5253
.find(|&(key, _)| key == STREAM_NAME_HEADER_KEY)
5354
{
5455
let stream_name = stream_name.to_str().unwrap().to_owned();
56+
if stream_name.eq(INTERNAL_STREAM_NAME) {
57+
return Err(PostError::Invalid(anyhow::anyhow!(
58+
"Stream {} is an internal stream and cannot be ingested into",
59+
stream_name
60+
)));
61+
}
5562
create_stream_if_not_exists(&stream_name).await?;
5663

5764
flatten_and_push_logs(req, body, stream_name).await?;
@@ -61,6 +68,40 @@ pub async fn ingest(req: HttpRequest, body: Bytes) -> Result<HttpResponse, PostE
6168
}
6269
}
6370

71+
pub async fn ingest_internal_stream(stream_name: String, body: Bytes) -> Result<(), PostError> {
72+
create_stream_if_not_exists(&stream_name).await?;
73+
let size: usize = body.len();
74+
let parsed_timestamp = Utc::now().naive_utc();
75+
let (rb, is_first) = {
76+
let body_val: Value = serde_json::from_slice(&body)?;
77+
let hash_map = STREAM_INFO.read().unwrap();
78+
let schema = hash_map
79+
.get(&stream_name)
80+
.ok_or(PostError::StreamNotFound(stream_name.clone()))?
81+
.schema
82+
.clone();
83+
let event = format::json::Event {
84+
data: body_val,
85+
tags: String::default(),
86+
metadata: String::default(),
87+
};
88+
event.into_recordbatch(schema, None, None)?
89+
};
90+
event::Event {
91+
rb,
92+
stream_name,
93+
origin_format: "json",
94+
origin_size: size as u64,
95+
is_first_event: is_first,
96+
parsed_timestamp,
97+
time_partition: None,
98+
custom_partition_values: HashMap::new(),
99+
}
100+
.process()
101+
.await?;
102+
Ok(())
103+
}
104+
64105
async fn flatten_and_push_logs(
65106
req: HttpRequest,
66107
body: Bytes,
@@ -93,7 +134,12 @@ async fn flatten_and_push_logs(
93134
// fails if the logstream does not exist
94135
pub async fn post_event(req: HttpRequest, body: Bytes) -> Result<HttpResponse, PostError> {
95136
let stream_name: String = req.match_info().get("logstream").unwrap().parse().unwrap();
96-
137+
if stream_name.eq(INTERNAL_STREAM_NAME) {
138+
return Err(PostError::Invalid(anyhow::anyhow!(
139+
"Stream {} is an internal stream and cannot be ingested into",
140+
stream_name
141+
)));
142+
}
97143
flatten_and_push_logs(req, body, stream_name).await?;
98144
Ok(HttpResponse::Ok().finish())
99145
}

server/src/handlers/http/logstream.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818

1919
use self::error::{CreateStreamError, StreamError};
2020
use super::base_path_without_preceding_slash;
21-
use super::cluster::fetch_stats_from_ingestors;
2221
use super::cluster::utils::{merge_quried_stats, IngestionStats, QueriedStats, StorageStats};
22+
use super::cluster::{fetch_stats_from_ingestors, INTERNAL_STREAM_NAME};
2323
use crate::alerts::Alerts;
2424
use crate::handlers::{
2525
CUSTOM_PARTITION_KEY, STATIC_SCHEMA_FLAG, TIME_PARTITION_KEY, TIME_PARTITION_LIMIT_KEY,
@@ -591,7 +591,9 @@ pub async fn create_stream(
591591
schema: Arc<Schema>,
592592
) -> Result<(), CreateStreamError> {
593593
// fail to proceed if invalid stream name
594-
validator::stream_name(&stream_name)?;
594+
if stream_name.ne(INTERNAL_STREAM_NAME) {
595+
validator::stream_name(&stream_name)?;
596+
}
595597

596598
// Proceed to create log stream if it doesn't exist
597599
let storage = CONFIG.storage().get_object_store();

server/src/handlers/http/modal/ingest_server.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use crate::handlers::http::middleware::RouteExt;
2424
use crate::localcache::LocalCacheManager;
2525
use crate::metadata;
2626
use crate::metrics;
27+
use crate::migration;
2728
use crate::migration::metadata_migration::migrate_ingester_metadata;
2829
use crate::rbac;
2930
use crate::rbac::role::Action;
@@ -328,6 +329,8 @@ impl IngestServer {
328329
let prometheus = metrics::build_metrics_handler();
329330
CONFIG.storage().register_store_metrics(&prometheus);
330331

332+
migration::run_migration(&CONFIG).await?;
333+
331334
let storage = CONFIG.storage().get_object_store();
332335
if let Err(err) = metadata::STREAM_INFO.load(&*storage).await {
333336
log::warn!("could not populate local metadata. {:?}", err);

server/src/handlers/http/modal/query_server.rs

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,12 @@
1717
*/
1818

1919
use crate::handlers::airplane;
20-
use crate::handlers::http::cluster;
20+
use crate::handlers::http::cluster::{self, init_cluster_metrics_schedular};
2121
use crate::handlers::http::middleware::RouteExt;
2222
use crate::handlers::http::{base_path, cross_origin_config, API_BASE_PATH, API_VERSION};
2323

2424
use crate::rbac::role::Action;
25+
use crate::sync;
2526
use crate::{analytics, banner, metadata, metrics, migration, rbac, storage};
2627
use actix_web::web;
2728
use actix_web::web::ServiceConfig;
@@ -185,11 +186,39 @@ impl QueryServer {
185186
analytics::init_analytics_scheduler()?;
186187
}
187188

188-
tokio::spawn(airplane::server());
189-
190-
self.start(prometheus, CONFIG.parseable.openid.clone())
191-
.await?;
189+
if matches!(init_cluster_metrics_schedular(), Ok(())) {
190+
log::info!("Cluster metrics scheduler started successfully");
191+
}
192+
let (localsync_handler, mut localsync_outbox, localsync_inbox) = sync::run_local_sync();
193+
let (mut remote_sync_handler, mut remote_sync_outbox, mut remote_sync_inbox) =
194+
sync::object_store_sync();
192195

193-
Ok(())
196+
tokio::spawn(airplane::server());
197+
let app = self.start(prometheus, CONFIG.parseable.openid.clone());
198+
199+
tokio::pin!(app);
200+
loop {
201+
tokio::select! {
202+
e = &mut app => {
203+
// actix server finished .. stop other threads and stop the server
204+
remote_sync_inbox.send(()).unwrap_or(());
205+
localsync_inbox.send(()).unwrap_or(());
206+
localsync_handler.join().unwrap_or(());
207+
remote_sync_handler.join().unwrap_or(());
208+
return e
209+
},
210+
_ = &mut localsync_outbox => {
211+
// crash the server if localsync fails for any reason
212+
// panic!("Local Sync thread died. Server will fail now!")
213+
return Err(anyhow::Error::msg("Failed to sync local data to drive. Please restart the Parseable server.\n\nJoin us on Parseable Slack if the issue persists after restart : https://launchpass.com/parseable"))
214+
},
215+
_ = &mut remote_sync_outbox => {
216+
// remote_sync failed, this is recoverable by just starting remote_sync thread again
217+
remote_sync_handler.join().unwrap_or(());
218+
(remote_sync_handler, remote_sync_outbox, remote_sync_inbox) = sync::object_store_sync();
219+
}
220+
221+
};
222+
}
194223
}
195224
}

0 commit comments

Comments
 (0)