Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(proof-data-handler): exclude batches without object file in GCS #2980

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions core/lib/dal/doc/TeeProofGenerationDal.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
title: Status Diagram
---
stateDiagram-v2
[*] --> unpicked : insert_tee_proof_generation_job
unpicked --> picked_by_prover : lock_batch_for_proving
[*] --> picked_by_prover : lock
picked_by_prover --> generated : save_proof_artifacts_metadata
picked_by_prover --> unpicked : unlock_batch
picked_by_prover --> permanently_ignored : unlock_batch
picked_by_prover --> failed : unlock_batch
failed --> picked_by_prover : lock
permanently_ignored --> [*]
generated --> [*]
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- There were manually added tee_proof_generation_details entries with status 'permanently_ignore'.

UPDATE tee_proof_generation_details SET status = 'permanently_ignored' WHERE status = 'permanently_ignore';

-- Entries with the status 'unpicked' were not used at all after the migration to the logic
-- introduced in https://github.com/matter-labs/zksync-era/pull/3017. This was overlooked.

DELETE FROM tee_proof_generation_details WHERE status = 'unpicked';
20 changes: 19 additions & 1 deletion core/lib/dal/src/models/storage_tee_proof.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use chrono::NaiveDateTime;
use chrono::{DateTime, NaiveDateTime, Utc};
use zksync_types::L1BatchNumber;

use crate::tee_proof_generation_dal::LockedBatch;

#[derive(Debug, Clone, sqlx::FromRow)]
pub struct StorageTeeProof {
Expand All @@ -8,3 +11,18 @@ pub struct StorageTeeProof {
pub updated_at: NaiveDateTime,
pub attestation: Option<Vec<u8>>,
}

#[derive(Debug, Clone, sqlx::FromRow)]
pub struct StorageLockedBatch {
pub l1_batch_number: i64,
pub created_at: NaiveDateTime,
}

impl From<StorageLockedBatch> for LockedBatch {
fn from(tx: StorageLockedBatch) -> LockedBatch {
LockedBatch {
l1_batch_number: L1BatchNumber::from(tx.l1_batch_number as u32),
created_at: DateTime::<Utc>::from_naive_utc_and_offset(tx.created_at, Utc),
}
}
}
64 changes: 44 additions & 20 deletions core/lib/dal/src/tee_proof_generation_dal.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#![doc = include_str!("../doc/TeeProofGenerationDal.md")]
use std::time::Duration;

use chrono::{DateTime, Utc};
use strum::{Display, EnumString};
use zksync_db_connection::{
connection::Connection,
Expand All @@ -10,21 +11,42 @@ use zksync_db_connection::{
};
use zksync_types::{tee_types::TeeType, L1BatchNumber};

use crate::{models::storage_tee_proof::StorageTeeProof, Core};
use crate::{
models::storage_tee_proof::{StorageLockedBatch, StorageTeeProof},
Core,
};

#[derive(Debug)]
pub struct TeeProofGenerationDal<'a, 'c> {
pub(crate) storage: &'a mut Connection<'c, Core>,
}

#[derive(Debug, EnumString, Display)]
enum TeeProofGenerationJobStatus {
#[strum(serialize = "unpicked")]
Unpicked,
#[derive(Debug, Clone, Copy, EnumString, Display)]
pub enum TeeProofGenerationJobStatus {
#[strum(serialize = "picked_by_prover")]
PickedByProver,
#[strum(serialize = "generated")]
Generated,
#[strum(serialize = "failed")]
Failed,
#[strum(serialize = "permanently_ignored")]
PermanentlyIgnored,
}

/// Represents a locked batch picked by a TEE prover. A batch is locked when taken by a TEE prover
/// ([TeeProofGenerationJobStatus::PickedByProver]). It can transition to one of three states:
/// 1. [TeeProofGenerationJobStatus::Generated] when the proof is successfully submitted.
/// 2. [TeeProofGenerationJobStatus::Failed] when the proof generation fails, which can happen if
/// its inputs (GCS blob files) are incomplete or the API is unavailable for an extended period.
/// 3. [TeeProofGenerationJobStatus::PermanentlyIgnored] when the proof generation has been
/// continuously failing for an extended period.
#[derive(Clone, Debug)]
pub struct LockedBatch {
/// Locked batch number.
pub l1_batch_number: L1BatchNumber,
/// The creation time of the job for this batch. It is used to determine if the batch should
/// transition to [TeeProofGenerationJobStatus::PermanentlyIgnored] or [TeeProofGenerationJobStatus::Failed].
pub created_at: DateTime<Utc>,
}

impl TeeProofGenerationDal<'_, '_> {
Expand All @@ -33,10 +55,11 @@ impl TeeProofGenerationDal<'_, '_> {
tee_type: TeeType,
processing_timeout: Duration,
min_batch_number: L1BatchNumber,
) -> DalResult<Option<L1BatchNumber>> {
) -> DalResult<Option<LockedBatch>> {
let processing_timeout = pg_interval_from_duration(processing_timeout);
let min_batch_number = i64::from(min_batch_number.0);
sqlx::query!(
let locked_batch = sqlx::query_as!(
StorageLockedBatch,
r#"
WITH upsert AS (
SELECT
Expand All @@ -57,11 +80,8 @@ impl TeeProofGenerationDal<'_, '_> {
AND (
tee.l1_batch_number IS NULL
OR (
tee.status = $3
OR (
tee.status = $2
AND tee.prover_taken_at < NOW() - $4::INTERVAL
)
(tee.status = $2 OR tee.status = $3)
AND tee.prover_taken_at < NOW() - $4::INTERVAL
)
)
FETCH FIRST ROW ONLY
Expand All @@ -87,11 +107,12 @@ impl TeeProofGenerationDal<'_, '_> {
updated_at = NOW(),
prover_taken_at = NOW()
RETURNING
l1_batch_number
l1_batch_number,
created_at
"#,
tee_type.to_string(),
TeeProofGenerationJobStatus::PickedByProver.to_string(),
TeeProofGenerationJobStatus::Unpicked.to_string(),
TeeProofGenerationJobStatus::Failed.to_string(),
processing_timeout,
min_batch_number
)
Expand All @@ -100,14 +121,17 @@ impl TeeProofGenerationDal<'_, '_> {
.with_arg("processing_timeout", &processing_timeout)
.with_arg("l1_batch_number", &min_batch_number)
.fetch_optional(self.storage)
.await
.map(|record| record.map(|record| L1BatchNumber(record.l1_batch_number as u32)))
.await?
.map(Into::into);

Ok(locked_batch)
}

pub async fn unlock_batch(
&mut self,
l1_batch_number: L1BatchNumber,
tee_type: TeeType,
status: TeeProofGenerationJobStatus,
) -> DalResult<()> {
let batch_number = i64::from(l1_batch_number.0);
sqlx::query!(
Expand All @@ -120,7 +144,7 @@ impl TeeProofGenerationDal<'_, '_> {
l1_batch_number = $2
AND tee_type = $3
"#,
TeeProofGenerationJobStatus::Unpicked.to_string(),
status.to_string(),
batch_number,
tee_type.to_string()
)
Expand Down Expand Up @@ -266,7 +290,7 @@ impl TeeProofGenerationDal<'_, '_> {
"#,
batch_number,
tee_type.to_string(),
TeeProofGenerationJobStatus::Unpicked.to_string(),
TeeProofGenerationJobStatus::PickedByProver.to_string(),
);
let instrumentation = Instrumented::new("insert_tee_proof_generation_job")
.with_arg("l1_batch_number", &batch_number)
Expand All @@ -281,7 +305,7 @@ impl TeeProofGenerationDal<'_, '_> {
}

/// For testing purposes only.
pub async fn get_oldest_unpicked_batch(&mut self) -> DalResult<Option<L1BatchNumber>> {
pub async fn get_oldest_picked_by_prover_batch(&mut self) -> DalResult<Option<L1BatchNumber>> {
let query = sqlx::query!(
r#"
SELECT
Expand All @@ -295,7 +319,7 @@ impl TeeProofGenerationDal<'_, '_> {
LIMIT
1
"#,
TeeProofGenerationJobStatus::Unpicked.to_string(),
TeeProofGenerationJobStatus::PickedByProver.to_string(),
);
let batch_number = Instrumented::new("get_oldest_unpicked_batch")
.with(query)
Expand Down
13 changes: 12 additions & 1 deletion core/lib/object_store/src/retries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@ impl Request<'_> {
self,
store: &impl fmt::Debug,
max_retries: u16,
f: F,
) -> Result<T, ObjectStoreError>
where
Fut: Future<Output = Result<T, ObjectStoreError>>,
F: FnMut() -> Fut,
{
self.retry_internal(max_retries, f).await
}

async fn retry_internal<T, Fut, F>(
&self,
max_retries: u16,
Comment on lines +31 to +42
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JFYI: this is an artifact that I'm gonna revert.

mut f: F,
) -> Result<T, ObjectStoreError>
where
Expand All @@ -53,7 +65,6 @@ impl Request<'_> {
backoff_secs *= 2;
}
Err(err) => {
tracing::warn!(%err, "Failed request with a fatal error");
break Err(err);
}
}
Expand Down
2 changes: 1 addition & 1 deletion core/lib/types/src/api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use serde_json::Value;
use serde_with::{hex::Hex, serde_as};
use strum::Display;
use zksync_basic_types::{
tee_types::TeeType,
web3::{AccessList, Bytes, Index},
Bloom, L1BatchNumber, H160, H256, H64, U256, U64,
};
Expand All @@ -16,6 +15,7 @@ pub use crate::transaction_request::{
use crate::{
debug_flat_call::{DebugCallFlat, ResultDebugCallFlat},
protocol_version::L1VerifierConfig,
tee_types::TeeType,
Address, L2BlockNumber, ProtocolVersionId,
};

Expand Down
4 changes: 2 additions & 2 deletions core/node/proof_data_handler/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ axum.workspace = true
tokio.workspace = true
tower-http = { workspace = true, features = ["compression-zstd", "decompression-zstd"] }
tracing.workspace = true
chrono.workspace = true

[dev-dependencies]
hyper.workspace = true
chrono.workspace = true
zksync_multivm.workspace = true
serde_json.workspace = true
tower.workspace = true
zksync_basic_types.workspace = true
zksync_contracts.workspace = true
zksync_basic_types.workspace = true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: You don't usually need zksync_basic_types as a direct dep if you depend on zksync_types; the latter re-exports a substantial part of basic types.

Loading
Loading