From a3444689139a3d77cba01c18a48fb0b153af8763 Mon Sep 17 00:00:00 2001 From: Simon Warta Date: Sat, 6 Jan 2024 14:16:32 +0100 Subject: [PATCH 1/2] Store engine together with Module to fix memory increase issue --- packages/vm/src/cache.rs | 100 ++++++++++-------- packages/vm/src/modules/cached_module.rs | 23 +++- packages/vm/src/modules/file_system_cache.rs | 59 +++++++---- packages/vm/src/modules/in_memory_cache.rs | 84 +++++++++------ packages/vm/src/modules/mod.rs | 2 +- .../vm/src/modules/pinned_memory_cache.rs | 55 ++++++---- 6 files changed, 199 insertions(+), 124 deletions(-) diff --git a/packages/vm/src/cache.rs b/packages/vm/src/cache.rs index 4616c99653..40ba51c8da 100644 --- a/packages/vm/src/cache.rs +++ b/packages/vm/src/cache.rs @@ -5,7 +5,7 @@ use std::marker::PhantomData; use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::Mutex; -use wasmer::{Engine, Store}; +use wasmer::{Module, Store}; use cosmwasm_std::Checksum; @@ -19,7 +19,7 @@ use crate::modules::{CachedModule, FileSystemCache, InMemoryCache, PinnedMemoryC use crate::parsed_wasm::ParsedWasm; use crate::size::Size; use crate::static_analysis::{Entrypoint, ExportInfo, REQUIRED_IBC_EXPORTS}; -use crate::wasm_backend::{compile, make_compiling_engine, make_runtime_engine}; +use crate::wasm_backend::{compile, make_compiling_engine}; const STATE_DIR: &str = "state"; // Things related to the state of the blockchain. @@ -91,17 +91,6 @@ pub struct CacheInner { memory_cache: InMemoryCache, fs_cache: FileSystemCache, stats: Stats, - /// A single engine to execute all contracts in this cache instance (usually - /// this means all contracts in the process). - /// - /// This engine is headless, i.e. does not contain a Singlepass compiler. - /// It only executes modules compiled with other engines. - /// - /// The engine has one memory limit set which is the same for all contracts - /// running with it. If different memory limits would be needed for different - /// contracts at some point, we'd need multiple engines. This is because the tunables - /// that control the limit are attached to the engine. - runtime_engine: Engine, } pub struct Cache { @@ -109,6 +98,7 @@ pub struct Cache { /// i.e. any number of read-only references is allowed to access it concurrently. available_capabilities: HashSet, inner: Mutex, + instance_memory_limit: Size, // Those two don't store data but only fix type information type_api: PhantomData, type_storage: PhantomData, @@ -169,8 +159,8 @@ where memory_cache: InMemoryCache::new(memory_cache_size), fs_cache, stats: Stats::default(), - runtime_engine: make_runtime_engine(Some(instance_memory_limit)), }), + instance_memory_limit, type_storage: PhantomData::, type_api: PhantomData::, type_querier: PhantomData::, @@ -318,11 +308,12 @@ where // for a not-so-relevant use case. // Try to get module from file system cache - if let Some((module, module_size)) = cache.fs_cache.load(checksum, &cache.runtime_engine)? { + if let Some(cached_module) = cache + .fs_cache + .load(checksum, Some(self.instance_memory_limit))? + { cache.stats.hits_fs_cache = cache.stats.hits_fs_cache.saturating_add(1); - return cache - .pinned_memory_cache - .store(checksum, module, module_size); + return cache.pinned_memory_cache.store(checksum, cached_module); } // Re-compile from original Wasm bytecode @@ -337,16 +328,16 @@ where } // This time we'll hit the file-system cache. - let Some((module, module_size)) = cache.fs_cache.load(checksum, &cache.runtime_engine)? + let Some(cached_module) = cache + .fs_cache + .load(checksum, Some(self.instance_memory_limit))? else { return Err(VmError::generic_err( "Can't load module from file system cache after storing it to file system cache (pin)", )); }; - cache - .pinned_memory_cache - .store(checksum, module, module_size) + cache.pinned_memory_cache.store(checksum, cached_module) } /// Unpins a Module, i.e. removes it from the pinned memory cache. @@ -370,10 +361,10 @@ where backend: Backend, options: InstanceOptions, ) -> VmResult> { - let (cached, store) = self.get_module(checksum)?; + let (module, store) = self.get_module(checksum)?; let instance = Instance::from_module( store, - &cached.module, + &module, backend, options.gas_limit, None, @@ -385,36 +376,49 @@ where /// Returns a module tied to a previously saved Wasm. /// Depending on availability, this is either generated from a memory cache, file system cache or Wasm code. /// This is part of `get_instance` but pulled out to reduce the locking time. - fn get_module(&self, checksum: &Checksum) -> VmResult<(CachedModule, Store)> { + fn get_module(&self, checksum: &Checksum) -> VmResult<(Module, Store)> { let mut cache = self.inner.lock().unwrap(); // Try to get module from the pinned memory cache if let Some(element) = cache.pinned_memory_cache.load(checksum)? { cache.stats.hits_pinned_memory_cache = cache.stats.hits_pinned_memory_cache.saturating_add(1); - let store = Store::new(cache.runtime_engine.clone()); - return Ok((element, store)); + let CachedModule { + module, + engine, + size_estimate: _, + } = element; + let store = Store::new(engine); + return Ok((module, store)); } // Get module from memory cache if let Some(element) = cache.memory_cache.load(checksum)? { cache.stats.hits_memory_cache = cache.stats.hits_memory_cache.saturating_add(1); - let store = Store::new(cache.runtime_engine.clone()); - return Ok((element, store)); + let CachedModule { + module, + engine, + size_estimate: _, + } = element; + let store = Store::new(engine); + return Ok((module, store)); } // Get module from file system cache - if let Some((module, module_size)) = cache.fs_cache.load(checksum, &cache.runtime_engine)? { + if let Some(cached_module) = cache + .fs_cache + .load(checksum, Some(self.instance_memory_limit))? + { cache.stats.hits_fs_cache = cache.stats.hits_fs_cache.saturating_add(1); - cache - .memory_cache - .store(checksum, module.clone(), module_size)?; - let cached = CachedModule { + cache.memory_cache.store(checksum, cached_module.clone())?; + + let CachedModule { module, - size_estimate: module_size, - }; - let store = Store::new(cache.runtime_engine.clone()); - return Ok((cached, store)); + engine, + size_estimate: _, + } = cached_module; + let store = Store::new(engine); + return Ok((module, store)); } // Re-compile module from wasm @@ -433,21 +437,23 @@ where } // This time we'll hit the file-system cache. - let Some((module, module_size)) = cache.fs_cache.load(checksum, &cache.runtime_engine)? + let Some(cached_module) = cache + .fs_cache + .load(checksum, Some(self.instance_memory_limit))? else { return Err(VmError::generic_err( "Can't load module from file system cache after storing it to file system cache (get_module)", )); }; - cache - .memory_cache - .store(checksum, module.clone(), module_size)?; - let cached = CachedModule { + cache.memory_cache.store(checksum, cached_module.clone())?; + + let CachedModule { module, - size_estimate: module_size, - }; - let store = Store::new(cache.runtime_engine.clone()); - Ok((cached, store)) + engine, + size_estimate: _, + } = cached_module; + let store = Store::new(engine); + Ok((module, store)) } } diff --git a/packages/vm/src/modules/cached_module.rs b/packages/vm/src/modules/cached_module.rs index 6f9a157a9b..73c8d1fa58 100644 --- a/packages/vm/src/modules/cached_module.rs +++ b/packages/vm/src/modules/cached_module.rs @@ -1,13 +1,30 @@ -use wasmer::Module; +use wasmer::{Engine, Module}; + +/// Some manual tests on Simon's machine showed that Engine is roughly 3-5 KB big, +/// so give it a constant 10 KiB estimate. +#[inline] +pub fn engine_size_estimate() -> usize { + 10 * 1024 +} #[derive(Debug, Clone)] pub struct CachedModule { pub module: Module, + /// The runtime engine to run this module. Ideally we could use a single engine + /// for all modules but the memory issue described in + /// requires using one engine per module as a workaround. + pub engine: Engine, /// The estimated size of this element in memory. /// Since the cached modules are just [rkyv](https://rkyv.org/) dumps of the Module /// instances, we use the file size of the module on disk (not the Wasm!) /// as an estimate for this. - /// Note: Since CosmWasm 1.4 (Wasmer 4), Store/Engine are not cached anymore. - /// The majority of the Module size is the Artifact. + /// + /// Between CosmWasm 1.4 (Wasmer 4) and 1.5.2, Store/Engine were not cached. This lead to a + /// memory consumption problem. From 1.5.2 on, Module and Engine are cached and Store is created + /// from Engine on demand. + /// + /// The majority of the Module size is the Artifact which is why we use the module filesize as the estimate. + /// Some manual tests on Simon's machine showed that Engine is roughly 3-5 KB big, so give it a constant + /// estimate: [`engine_size_estimate`]. pub size_estimate: usize, } diff --git a/packages/vm/src/modules/file_system_cache.rs b/packages/vm/src/modules/file_system_cache.rs index fd87d9cd4e..5338c71818 100644 --- a/packages/vm/src/modules/file_system_cache.rs +++ b/packages/vm/src/modules/file_system_cache.rs @@ -4,13 +4,18 @@ use std::io; use std::path::{Path, PathBuf}; use thiserror::Error; -use wasmer::{AsEngineRef, DeserializeError, Module, Target}; +use wasmer::{DeserializeError, Module, Target}; use cosmwasm_std::Checksum; use crate::errors::{VmError, VmResult}; use crate::filesystem::mkdir_p; use crate::modules::current_wasmer_module_version; +use crate::wasm_backend::make_runtime_engine; +use crate::Size; + +use super::cached_module::engine_size_estimate; +use super::CachedModule; /// Bump this version whenever the module system changes in a way /// that old stored modules would be corrupt when loaded in the new system. @@ -129,24 +134,29 @@ impl FileSystemCache { path } - /// Loads a serialized module from the file system and returns a module (i.e. artifact + store), - /// along with the size of the serialized module. + /// Loads a serialized module from the file system and returns a Module + Engine, + /// along with a size estimation for the pair. pub fn load( &self, checksum: &Checksum, - engine: &impl AsEngineRef, - ) -> VmResult> { + memory_limit: Option, + ) -> VmResult> { let file_path = self.module_file(checksum); + let engine = make_runtime_engine(memory_limit); let result = if self.unchecked_modules { - unsafe { Module::deserialize_from_file_unchecked(engine, &file_path) } + unsafe { Module::deserialize_from_file_unchecked(&engine, &file_path) } } else { - unsafe { Module::deserialize_from_file(engine, &file_path) } + unsafe { Module::deserialize_from_file(&engine, &file_path) } }; match result { Ok(module) => { let module_size = module_size(&file_path)?; - Ok(Some((module, module_size))) + Ok(Some(CachedModule { + module, + engine, + size_estimate: module_size + engine_size_estimate(), + })) } Err(DeserializeError::Io(err)) => match err.kind() { io::ErrorKind::NotFound => Ok(None), @@ -225,7 +235,7 @@ mod tests { use super::*; use crate::{ size::Size, - wasm_backend::{compile, make_compiling_engine, make_runtime_engine}, + wasm_backend::{compile, make_compiling_engine}, }; use tempfile::TempDir; use wasmer::{imports, Instance as WasmerInstance, Store}; @@ -252,8 +262,7 @@ mod tests { let checksum = Checksum::generate(&wasm); // Module does not exist - let runtime_engine = make_runtime_engine(TESTING_MEMORY_LIMIT); - let cached = cache.load(&checksum, &runtime_engine).unwrap(); + let cached = cache.load(&checksum, TESTING_MEMORY_LIMIT).unwrap(); assert!(cached.is_none()); // Store module @@ -262,14 +271,21 @@ mod tests { cache.store(&checksum, &module).unwrap(); // Load module - let cached = cache.load(&checksum, &runtime_engine).unwrap(); + let cached = cache.load(&checksum, TESTING_MEMORY_LIMIT).unwrap(); assert!(cached.is_some()); // Check the returned module is functional. // This is not really testing the cache API but better safe than sorry. { - let (cached_module, module_size) = cached.unwrap(); - assert_eq!(module_size, module.serialize().unwrap().len()); + let CachedModule { + module: cached_module, + engine: runtime_engine, + size_estimate, + } = cached.unwrap(); + assert_eq!( + size_estimate, + module.serialize().unwrap().len() + 10240 /* engine size estimate */ + ); let import_object = imports! {}; let mut store = Store::new(runtime_engine); let instance = WasmerInstance::new(&mut store, &cached_module, &import_object).unwrap(); @@ -314,20 +330,25 @@ mod tests { let checksum = Checksum::generate(&wasm); // Store module - let engine1 = make_compiling_engine(TESTING_MEMORY_LIMIT); - let module = compile(&engine1, &wasm).unwrap(); + let compiling_engine = make_compiling_engine(TESTING_MEMORY_LIMIT); + let module = compile(&compiling_engine, &wasm).unwrap(); cache.store(&checksum, &module).unwrap(); // It's there - let engine2 = make_runtime_engine(TESTING_MEMORY_LIMIT); - assert!(cache.load(&checksum, &engine2).unwrap().is_some()); + assert!(cache + .load(&checksum, TESTING_MEMORY_LIMIT) + .unwrap() + .is_some()); // Remove module let existed = cache.remove(&checksum).unwrap(); assert!(existed); // it's gone now - assert!(cache.load(&checksum, &engine2).unwrap().is_none()); + assert!(cache + .load(&checksum, TESTING_MEMORY_LIMIT) + .unwrap() + .is_none()); // Remove again let existed = cache.remove(&checksum).unwrap(); diff --git a/packages/vm/src/modules/in_memory_cache.rs b/packages/vm/src/modules/in_memory_cache.rs index 6708168394..69a74106f3 100644 --- a/packages/vm/src/modules/in_memory_cache.rs +++ b/packages/vm/src/modules/in_memory_cache.rs @@ -1,7 +1,6 @@ use clru::{CLruCache, CLruCacheConfig, WeightScale}; use std::collections::hash_map::RandomState; use std::num::NonZeroUsize; -use wasmer::Module; use cosmwasm_std::Checksum; @@ -51,21 +50,10 @@ impl InMemoryCache { } } - pub fn store( - &mut self, - checksum: &Checksum, - entry: Module, - module_size: usize, - ) -> VmResult<()> { + pub fn store(&mut self, checksum: &Checksum, cached_module: CachedModule) -> VmResult<()> { if let Some(modules) = &mut self.modules { modules - .put_with_weight( - *checksum, - CachedModule { - module: entry, - size_estimate: module_size, - }, - ) + .put_with_weight(*checksum, cached_module) .map_err(|e| VmError::cache_err(format!("{e:?}")))?; } Ok(()) @@ -108,10 +96,10 @@ mod tests { use super::*; use crate::{ size::Size, - wasm_backend::{compile, make_compiling_engine}, + wasm_backend::{compile, make_compiling_engine, make_runtime_engine}, }; use std::mem; - use wasmer::{imports, Instance as WasmerInstance, Store}; + use wasmer::{imports, Instance as WasmerInstance, Module, Store}; use wasmer_middlewares::metering::set_remaining_points; const TESTING_MEMORY_LIMIT: Option = Some(Size::mebi(16)); @@ -181,8 +169,12 @@ mod tests { } // Store module - let size = wasm.len() * TESTING_WASM_SIZE_FACTOR; - cache.store(&checksum, original, size).unwrap(); + let module = CachedModule { + module: original, + engine: make_runtime_engine(TESTING_MEMORY_LIMIT), + size_estimate: wasm.len() * TESTING_WASM_SIZE_FACTOR, + }; + cache.store(&checksum, module).unwrap(); // Load module let cached = cache.load(&checksum).unwrap().unwrap(); @@ -214,20 +206,32 @@ mod tests { // Add 1 let engine1 = make_compiling_engine(TESTING_MEMORY_LIMIT); - let module = compile(&engine1, &wasm1).unwrap(); - cache.store(&checksum1, module, 900_000).unwrap(); + let module = CachedModule { + module: compile(&engine1, &wasm1).unwrap(), + engine: make_runtime_engine(TESTING_MEMORY_LIMIT), + size_estimate: 900_000, + }; + cache.store(&checksum1, module).unwrap(); assert_eq!(cache.len(), 1); // Add 2 let engine2 = make_compiling_engine(TESTING_MEMORY_LIMIT); - let module = compile(&engine2, &wasm2).unwrap(); - cache.store(&checksum2, module, 900_000).unwrap(); + let module = CachedModule { + module: compile(&engine2, &wasm2).unwrap(), + engine: make_runtime_engine(TESTING_MEMORY_LIMIT), + size_estimate: 900_000, + }; + cache.store(&checksum2, module).unwrap(); assert_eq!(cache.len(), 2); // Add 3 (pushes out the previous two) let engine3 = make_compiling_engine(TESTING_MEMORY_LIMIT); - let module = compile(&engine3, &wasm3).unwrap(); - cache.store(&checksum3, module, 1_500_000).unwrap(); + let module = CachedModule { + module: compile(&engine3, &wasm3).unwrap(), + engine: make_runtime_engine(TESTING_MEMORY_LIMIT), + size_estimate: 1_500_000, + }; + cache.store(&checksum3, module).unwrap(); assert_eq!(cache.len(), 1); } @@ -247,20 +251,32 @@ mod tests { // Add 1 let engine1 = make_compiling_engine(TESTING_MEMORY_LIMIT); - let module = compile(&engine1, &wasm1).unwrap(); - cache.store(&checksum1, module, 900_000).unwrap(); + let module = CachedModule { + module: compile(&engine1, &wasm1).unwrap(), + engine: make_runtime_engine(TESTING_MEMORY_LIMIT), + size_estimate: 900_000, + }; + cache.store(&checksum1, module).unwrap(); assert_eq!(cache.size(), 900_032); // Add 2 let engine2 = make_compiling_engine(TESTING_MEMORY_LIMIT); - let module = compile(&engine2, &wasm2).unwrap(); - cache.store(&checksum2, module, 800_000).unwrap(); + let module = CachedModule { + module: compile(&engine2, &wasm2).unwrap(), + engine: make_runtime_engine(TESTING_MEMORY_LIMIT), + size_estimate: 800_000, + }; + cache.store(&checksum2, module).unwrap(); assert_eq!(cache.size(), 900_032 + 800_032); // Add 3 (pushes out the previous two) let engine3 = make_compiling_engine(TESTING_MEMORY_LIMIT); - let module = compile(&engine3, &wasm3).unwrap(); - cache.store(&checksum3, module, 1_500_000).unwrap(); + let module = CachedModule { + module: compile(&engine3, &wasm3).unwrap(), + engine: make_runtime_engine(TESTING_MEMORY_LIMIT), + size_estimate: 1_500_000, + }; + cache.store(&checksum3, module).unwrap(); assert_eq!(cache.size(), 1_500_032); } @@ -287,8 +303,12 @@ mod tests { let original = compile(&engine, &wasm).unwrap(); // Store module - let size = wasm.len() * TESTING_WASM_SIZE_FACTOR; - cache.store(&checksum, original, size).unwrap(); + let module = CachedModule { + module: original, + engine: make_runtime_engine(TESTING_MEMORY_LIMIT), + size_estimate: wasm.len() * TESTING_WASM_SIZE_FACTOR, + }; + cache.store(&checksum, module).unwrap(); assert_eq!(cache.len(), 0); assert_eq!(cache.size(), 0); diff --git a/packages/vm/src/modules/mod.rs b/packages/vm/src/modules/mod.rs index c871963e87..c3d2d55500 100644 --- a/packages/vm/src/modules/mod.rs +++ b/packages/vm/src/modules/mod.rs @@ -4,7 +4,7 @@ mod in_memory_cache; mod pinned_memory_cache; mod versioning; -pub use cached_module::CachedModule; +pub use cached_module::{engine_size_estimate, CachedModule}; pub use file_system_cache::{FileSystemCache, NewFileSystemCacheError}; pub use in_memory_cache::InMemoryCache; pub use pinned_memory_cache::PinnedMemoryCache; diff --git a/packages/vm/src/modules/pinned_memory_cache.rs b/packages/vm/src/modules/pinned_memory_cache.rs index b003bed21b..73b8a3b10f 100644 --- a/packages/vm/src/modules/pinned_memory_cache.rs +++ b/packages/vm/src/modules/pinned_memory_cache.rs @@ -1,6 +1,5 @@ use cosmwasm_std::Checksum; use std::collections::HashMap; -use wasmer::Module; use super::cached_module::CachedModule; use crate::VmResult; @@ -18,19 +17,8 @@ impl PinnedMemoryCache { } } - pub fn store( - &mut self, - checksum: &Checksum, - element: Module, - module_size: usize, - ) -> VmResult<()> { - self.modules.insert( - *checksum, - CachedModule { - module: element, - size_estimate: module_size, - }, - ); + pub fn store(&mut self, checksum: &Checksum, cached_module: CachedModule) -> VmResult<()> { + self.modules.insert(*checksum, cached_module); Ok(()) } @@ -75,7 +63,7 @@ impl PinnedMemoryCache { mod tests { use super::*; use crate::{ - wasm_backend::{compile, make_compiling_engine}, + wasm_backend::{compile, make_compiling_engine, make_runtime_engine}, Size, }; use wasmer::{imports, Instance as WasmerInstance, Store}; @@ -120,7 +108,12 @@ mod tests { } // Store module - cache.store(&checksum, original, 0).unwrap(); + let module = CachedModule { + module: original, + engine: make_runtime_engine(TESTING_MEMORY_LIMIT), + size_estimate: 0, + }; + cache.store(&checksum, module).unwrap(); // Load module let cached = cache.load(&checksum).unwrap().unwrap(); @@ -158,7 +151,12 @@ mod tests { // Add let engine = make_compiling_engine(TESTING_MEMORY_LIMIT); let original = compile(&engine, &wasm).unwrap(); - cache.store(&checksum, original, 0).unwrap(); + let module = CachedModule { + module: original, + engine: make_runtime_engine(TESTING_MEMORY_LIMIT), + size_estimate: 0, + }; + cache.store(&checksum, module).unwrap(); assert!(cache.has(&checksum)); @@ -190,7 +188,12 @@ mod tests { // Add let engine = make_compiling_engine(TESTING_MEMORY_LIMIT); let original = compile(&engine, &wasm).unwrap(); - cache.store(&checksum, original, 0).unwrap(); + let module = CachedModule { + module: original, + engine: make_runtime_engine(TESTING_MEMORY_LIMIT), + size_estimate: 0, + }; + cache.store(&checksum, module).unwrap(); assert_eq!(cache.len(), 1); @@ -232,14 +235,22 @@ mod tests { // Add 1 let engine1 = make_compiling_engine(TESTING_MEMORY_LIMIT); - let module = compile(&engine1, &wasm1).unwrap(); - cache.store(&checksum1, module, 500).unwrap(); + let module = CachedModule { + module: compile(&engine1, &wasm1).unwrap(), + engine: make_runtime_engine(TESTING_MEMORY_LIMIT), + size_estimate: 500, + }; + cache.store(&checksum1, module).unwrap(); assert_eq!(cache.size(), 532); // Add 2 let engine2 = make_compiling_engine(TESTING_MEMORY_LIMIT); - let module = compile(&engine2, &wasm2).unwrap(); - cache.store(&checksum2, module, 300).unwrap(); + let module = CachedModule { + module: compile(&engine2, &wasm2).unwrap(), + engine: make_runtime_engine(TESTING_MEMORY_LIMIT), + size_estimate: 300, + }; + cache.store(&checksum2, module).unwrap(); assert_eq!(cache.size(), 532 + 332); // Remove 1 From 1b110c672fe7da6fae7636dbef4ba3e7e15d6fb9 Mon Sep 17 00:00:00 2001 From: Simon Warta Date: Thu, 11 Jan 2024 18:08:00 +0100 Subject: [PATCH 2/2] Add CHANGELOG entry for fixing #1978 --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07c6219325..7795737658 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ and this project adheres to ## [2.0.0-beta.0] - 2023-12-21 +### Fixed + +- cosmwasm-vm: Fix memory increase issue (1.3 -> 1.4 regression) by avoiding the + use of a long running Wasmer Engine. ([#1978]) + +[#1978]: https://github.com/CosmWasm/cosmwasm/issues/1978 + ### Added - cosmwasm-std: Add `SubMsg:reply_never` constructor ([#1929])