-
Notifications
You must be signed in to change notification settings - Fork 90
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(raiko): bonsai auto scaling (#341)
* use bonsai auto scaling api Signed-off-by: smtmfft <[email protected]> * update auto-scaling poc * refine auto scaler set/get logic * remove dup code * add missing error return * Update provers/risc0/driver/src/lib.rs Co-authored-by: Petar Vujović <[email protected]> * Update provers/risc0/driver/src/bonsai/auto_scaling.rs Co-authored-by: Petar Vujović <[email protected]> * refine auto scaling * remove useless comments Signed-off-by: smtmfft <[email protected]> --------- Signed-off-by: smtmfft <[email protected]> Co-authored-by: Petar Vujović <[email protected]>
- Loading branch information
1 parent
cce1371
commit dc89e60
Showing
11 changed files
with
244 additions
and
18 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
use anyhow::{Error, Ok, Result}; | ||
use lazy_static::lazy_static; | ||
use reqwest::{header::HeaderMap, header::HeaderValue, header::CONTENT_TYPE, Client}; | ||
use serde::Deserialize; | ||
use std::env; | ||
use tracing::{debug, error as trace_err}; | ||
|
||
#[derive(Debug, Deserialize, Default)] | ||
struct ScalerResponse { | ||
desired: u32, | ||
current: u32, | ||
pending: u32, | ||
} | ||
struct BonsaiAutoScaler { | ||
url: String, | ||
headers: HeaderMap, | ||
client: Client, | ||
on_setting_status: Option<ScalerResponse>, | ||
} | ||
|
||
impl BonsaiAutoScaler { | ||
fn new(bonsai_api_url: String, api_key: String) -> Self { | ||
let url = bonsai_api_url + "/workers"; | ||
let mut headers = HeaderMap::new(); | ||
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); | ||
headers.insert("x-api-key", HeaderValue::from_str(&api_key).unwrap()); | ||
|
||
Self { | ||
url, | ||
headers, | ||
client: Client::new(), | ||
on_setting_status: None, | ||
} | ||
} | ||
|
||
async fn get_bonsai_gpu_num(&self) -> Result<ScalerResponse> { | ||
debug!("Requesting scaler status from: {}", self.url); | ||
let response = self | ||
.client | ||
.get(self.url.clone()) | ||
.headers(self.headers.clone()) | ||
.send() | ||
.await?; | ||
|
||
// Check if the request was successful | ||
if response.status().is_success() { | ||
// Parse the JSON response | ||
let data: ScalerResponse = response.json().await.unwrap_or_default(); | ||
debug!("Scaler status: {data:?}"); | ||
Ok(data) | ||
} else { | ||
trace_err!("Request failed with status: {}", response.status()); | ||
Err(Error::msg("Failed to get bonsai gpu num".to_string())) | ||
} | ||
} | ||
|
||
async fn set_bonsai_gpu_num(&mut self, gpu_num: u32) -> Result<()> { | ||
if self.on_setting_status.is_some() { | ||
// log an err if there is a race adjustment. | ||
trace_err!("Last bonsai setting is not active, please check."); | ||
} | ||
|
||
debug!("Requesting scaler status from: {}", self.url); | ||
let response = self | ||
.client | ||
.post(self.url.clone()) | ||
.headers(self.headers.clone()) | ||
.body(gpu_num.to_string()) | ||
.send() | ||
.await?; | ||
|
||
// Check if the request was successful | ||
if response.status().is_success() { | ||
self.on_setting_status = Some(ScalerResponse { | ||
desired: gpu_num, | ||
current: 0, | ||
pending: 0, | ||
}); | ||
Ok(()) | ||
} else { | ||
trace_err!("Request failed with status: {}", response.status()); | ||
Err(Error::msg("Failed to get bonsai gpu num".to_string())) | ||
} | ||
} | ||
|
||
async fn wait_for_bonsai_config_active(&mut self, time_out_sec: u64) -> Result<()> { | ||
match &self.on_setting_status { | ||
None => Ok(()), | ||
Some(setting) => { | ||
// loop until some timeout | ||
let start_time = std::time::Instant::now(); | ||
let mut check_time = std::time::Instant::now(); | ||
while check_time.duration_since(start_time).as_secs() < time_out_sec { | ||
tokio::time::sleep(tokio::time::Duration::from_secs(10)).await; | ||
check_time = std::time::Instant::now(); | ||
let current_bonsai_gpu_num = self.get_bonsai_gpu_num().await?; | ||
if current_bonsai_gpu_num.current == setting.desired { | ||
self.on_setting_status = None; | ||
return Ok(()); | ||
} | ||
} | ||
Err(Error::msg( | ||
"checking bonsai config active timeout".to_string(), | ||
)) | ||
} | ||
} | ||
} | ||
} | ||
|
||
lazy_static! { | ||
static ref BONSAI_API_URL: String = | ||
env::var("BONSAI_API_URL").expect("BONSAI_API_URL must be set"); | ||
static ref BONSAI_API_KEY: String = | ||
env::var("BONSAI_API_KEY").expect("BONSAI_API_KEY must be set"); | ||
static ref MAX_BONSAI_GPU_NUM: u32 = env::var("MAX_BONSAI_GPU_NUM") | ||
.unwrap_or_else(|_| "15".to_string()) | ||
.parse() | ||
.unwrap(); | ||
} | ||
|
||
pub(crate) async fn maxpower_bonsai() -> Result<()> { | ||
let mut auto_scaler = | ||
BonsaiAutoScaler::new(BONSAI_API_URL.to_string(), BONSAI_API_KEY.to_string()); | ||
let current_gpu_num = auto_scaler.get_bonsai_gpu_num().await?; | ||
// either already maxed out or pending to be maxed out | ||
if current_gpu_num.current == *MAX_BONSAI_GPU_NUM | ||
&& current_gpu_num.desired == *MAX_BONSAI_GPU_NUM | ||
&& current_gpu_num.pending == 0 | ||
{ | ||
Ok(()) | ||
} else { | ||
auto_scaler.set_bonsai_gpu_num(*MAX_BONSAI_GPU_NUM).await?; | ||
auto_scaler.wait_for_bonsai_config_active(300).await | ||
} | ||
} | ||
|
||
pub(crate) async fn shutdown_bonsai() -> Result<()> { | ||
let mut auto_scaler = | ||
BonsaiAutoScaler::new(BONSAI_API_URL.to_string(), BONSAI_API_KEY.to_string()); | ||
let current_gpu_num = auto_scaler.get_bonsai_gpu_num().await?; | ||
if current_gpu_num.current == 0 && current_gpu_num.pending == 0 && current_gpu_num.desired == 0 | ||
{ | ||
Ok(()) | ||
} else { | ||
auto_scaler.set_bonsai_gpu_num(0).await?; | ||
// wait few minute for the bonsai to cool down | ||
auto_scaler.wait_for_bonsai_config_active(30).await | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use super::*; | ||
use std::env; | ||
use tokio; | ||
|
||
#[ignore] | ||
#[tokio::test] | ||
async fn test_bonsai_auto_scaler_get() { | ||
let bonsai_url = env::var("BONSAI_API_URL").expect("BONSAI_API_URL must be set"); | ||
let bonsai_key = env::var("BONSAI_API_KEY").expect("BONSAI_API_KEY must be set"); | ||
let max_bonsai_gpu: u32 = env::var("MAX_BONSAI_GPU_NUM") | ||
.unwrap_or_else(|_| "15".to_string()) | ||
.parse() | ||
.unwrap(); | ||
let auto_scaler = BonsaiAutoScaler::new(bonsai_url, bonsai_key); | ||
let scalar_status = auto_scaler.get_bonsai_gpu_num().await.unwrap(); | ||
assert!(scalar_status.current <= max_bonsai_gpu); | ||
assert_eq!( | ||
scalar_status.desired, | ||
scalar_status.current + scalar_status.pending | ||
); | ||
} | ||
|
||
#[ignore] | ||
#[tokio::test] | ||
async fn test_bonsai_auto_scaler_set() { | ||
let bonsai_url = env::var("BONSAI_API_URL").expect("BONSAI_API_URL must be set"); | ||
let bonsai_key = env::var("BONSAI_API_KEY").expect("BONSAI_API_KEY must be set"); | ||
let mut auto_scaler = BonsaiAutoScaler::new(bonsai_url, bonsai_key); | ||
|
||
auto_scaler | ||
.set_bonsai_gpu_num(7) | ||
.await | ||
.expect("Failed to set bonsai gpu num"); | ||
auto_scaler | ||
.wait_for_bonsai_config_active(300) | ||
.await | ||
.unwrap(); | ||
let current_gpu_num = auto_scaler.get_bonsai_gpu_num().await.unwrap().current; | ||
assert_eq!(current_gpu_num, 7); | ||
|
||
auto_scaler | ||
.set_bonsai_gpu_num(0) | ||
.await | ||
.expect("Failed to set bonsai gpu num"); | ||
auto_scaler | ||
.wait_for_bonsai_config_active(300) | ||
.await | ||
.unwrap(); | ||
let current_gpu_num = auto_scaler.get_bonsai_gpu_num().await.unwrap().current; | ||
assert_eq!(current_gpu_num, 0); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
pub const ECDSA_ELF: &[u8] = | ||
include_bytes!("../../../guest/target/riscv32im-risc0-zkvm-elf/release/ecdsa"); | ||
pub const ECDSA_ID: [u32; 8] = [ | ||
3314277365, 903638368, 2823387338, 975292771, 2962241176, 3386670094, 1262198564, 423457744, | ||
1166688769, 1407190737, 3347938864, 1261472884, 3997842354, 3752365982, 4108615966, 2506107654, | ||
]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
pub const SHA256_ELF: &[u8] = | ||
include_bytes!("../../../guest/target/riscv32im-risc0-zkvm-elf/release/sha256"); | ||
pub const SHA256_ID: [u32; 8] = [ | ||
3506084161, 1146489446, 485833862, 3404354046, 3626029993, 1928006034, 3833244069, 3073098029, | ||
1030743442, 3697463329, 2083175350, 1726292372, 629109085, 444583534, 849554126, 3148184953, | ||
]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
pub const TEST_RISC0_GUEST_ELF: &[u8] = include_bytes!( | ||
"../../../guest/target/riscv32im-risc0-zkvm-elf/release/deps/risc0_guest-4b4f18d42a260659" | ||
"../../../guest/target/riscv32im-risc0-zkvm-elf/release/deps/risc0_guest-3bef88267f07d7e2" | ||
); | ||
pub const TEST_RISC0_GUEST_ID: [u32; 8] = [ | ||
3216516244, 2583889163, 799150854, 107525368, 1015178806, 1451965571, 3377528142, 1073775, | ||
947177299, 3433149683, 3077752115, 1716500464, 3011459317, 622725533, 247263939, 1661915565, | ||
]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters