Merge pull request #1982 from CosmWasm/one-engine-per-module

Store engine together with Module to mitigate memory increase issue
CosmWasm · Jan 11, 2024 · f6c9c46 · f6c9c46
2 parents 34914b1 + 1b110c6
commit f6c9c46
Show file tree

Hide file tree

Showing 7 changed files with 206 additions and 124 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,13 @@ and this project adheres to
 
 ## [2.0.0-beta.0] - 2023-12-21
 
+### Fixed
+
+- cosmwasm-vm: Fix memory increase issue (1.3 -> 1.4 regression) by avoiding the
+  use of a long running Wasmer Engine. ([#1978])
+
+[#1978]: https://github.com/CosmWasm/cosmwasm/issues/1978
+
 ### Added
 
 - cosmwasm-std: Add `SubMsg:reply_never` constructor ([#1929])

diff --git a/packages/vm/src/cache.rs b/packages/vm/src/cache.rs
@@ -5,7 +5,7 @@ use std::marker::PhantomData;
 use std::path::{Path, PathBuf};
 use std::str::FromStr;
 use std::sync::Mutex;
-use wasmer::{Engine, Store};
+use wasmer::{Module, Store};
 
 use cosmwasm_std::Checksum;
 
@@ -19,7 +19,7 @@ use crate::modules::{CachedModule, FileSystemCache, InMemoryCache, PinnedMemoryC
 use crate::parsed_wasm::ParsedWasm;
 use crate::size::Size;
 use crate::static_analysis::{Entrypoint, ExportInfo, REQUIRED_IBC_EXPORTS};
-use crate::wasm_backend::{compile, make_compiling_engine, make_runtime_engine};
+use crate::wasm_backend::{compile, make_compiling_engine};
 
 const STATE_DIR: &str = "state";
 // Things related to the state of the blockchain.
@@ -91,24 +91,14 @@ pub struct CacheInner {
     memory_cache: InMemoryCache,
     fs_cache: FileSystemCache,
     stats: Stats,
-    /// A single engine to execute all contracts in this cache instance (usually
-    /// this means all contracts in the process).
-    ///
-    /// This engine is headless, i.e. does not contain a Singlepass compiler.
-    /// It only executes modules compiled with other engines.
-    ///
-    /// The engine has one memory limit set which is the same for all contracts
-    /// running with it. If different memory limits would be needed for different
-    /// contracts at some point, we'd need multiple engines. This is because the tunables
-    /// that control the limit are attached to the engine.
-    runtime_engine: Engine,
 }
 
 pub struct Cache<A: BackendApi, S: Storage, Q: Querier> {
     /// Available capabilities are immutable for the lifetime of the cache,
     /// i.e. any number of read-only references is allowed to access it concurrently.
     available_capabilities: HashSet<String>,
     inner: Mutex<CacheInner>,
+    instance_memory_limit: Size,
     // Those two don't store data but only fix type information
     type_api: PhantomData<A>,
     type_storage: PhantomData<S>,
@@ -169,8 +159,8 @@ where
                 memory_cache: InMemoryCache::new(memory_cache_size),
                 fs_cache,
                 stats: Stats::default(),
-                runtime_engine: make_runtime_engine(Some(instance_memory_limit)),
             }),
+            instance_memory_limit,
             type_storage: PhantomData::<S>,
             type_api: PhantomData::<A>,
             type_querier: PhantomData::<Q>,
@@ -318,11 +308,12 @@ where
         // for a not-so-relevant use case.
 
         // Try to get module from file system cache
-        if let Some((module, module_size)) = cache.fs_cache.load(checksum, &cache.runtime_engine)? {
+        if let Some(cached_module) = cache
+            .fs_cache
+            .load(checksum, Some(self.instance_memory_limit))?
+        {
             cache.stats.hits_fs_cache = cache.stats.hits_fs_cache.saturating_add(1);
-            return cache
-                .pinned_memory_cache
-                .store(checksum, module, module_size);
+            return cache.pinned_memory_cache.store(checksum, cached_module);
         }
 
         // Re-compile from original Wasm bytecode
@@ -337,16 +328,16 @@ where
         }
 
         // This time we'll hit the file-system cache.
-        let Some((module, module_size)) = cache.fs_cache.load(checksum, &cache.runtime_engine)?
+        let Some(cached_module) = cache
+            .fs_cache
+            .load(checksum, Some(self.instance_memory_limit))?
         else {
             return Err(VmError::generic_err(
                 "Can't load module from file system cache after storing it to file system cache (pin)",
             ));
         };
 
-        cache
-            .pinned_memory_cache
-            .store(checksum, module, module_size)
+        cache.pinned_memory_cache.store(checksum, cached_module)
     }
 
     /// Unpins a Module, i.e. removes it from the pinned memory cache.
@@ -370,10 +361,10 @@ where
         backend: Backend<A, S, Q>,
         options: InstanceOptions,
     ) -> VmResult<Instance<A, S, Q>> {
-        let (cached, store) = self.get_module(checksum)?;
+        let (module, store) = self.get_module(checksum)?;
         let instance = Instance::from_module(
             store,
-            &cached.module,
+            &module,
             backend,
             options.gas_limit,
             None,
@@ -385,36 +376,49 @@ where
     /// Returns a module tied to a previously saved Wasm.
     /// Depending on availability, this is either generated from a memory cache, file system cache or Wasm code.
     /// This is part of `get_instance` but pulled out to reduce the locking time.
-    fn get_module(&self, checksum: &Checksum) -> VmResult<(CachedModule, Store)> {
+    fn get_module(&self, checksum: &Checksum) -> VmResult<(Module, Store)> {
         let mut cache = self.inner.lock().unwrap();
         // Try to get module from the pinned memory cache
         if let Some(element) = cache.pinned_memory_cache.load(checksum)? {
             cache.stats.hits_pinned_memory_cache =
                 cache.stats.hits_pinned_memory_cache.saturating_add(1);
-            let store = Store::new(cache.runtime_engine.clone());
-            return Ok((element, store));
+            let CachedModule {
+                module,
+                engine,
+                size_estimate: _,
+            } = element;
+            let store = Store::new(engine);
+            return Ok((module, store));
         }
 
         // Get module from memory cache
         if let Some(element) = cache.memory_cache.load(checksum)? {
             cache.stats.hits_memory_cache = cache.stats.hits_memory_cache.saturating_add(1);
-            let store = Store::new(cache.runtime_engine.clone());
-            return Ok((element, store));
+            let CachedModule {
+                module,
+                engine,
+                size_estimate: _,
+            } = element;
+            let store = Store::new(engine);
+            return Ok((module, store));
         }
 
         // Get module from file system cache
-        if let Some((module, module_size)) = cache.fs_cache.load(checksum, &cache.runtime_engine)? {
+        if let Some(cached_module) = cache
+            .fs_cache
+            .load(checksum, Some(self.instance_memory_limit))?
+        {
             cache.stats.hits_fs_cache = cache.stats.hits_fs_cache.saturating_add(1);
 
-            cache
-                .memory_cache
-                .store(checksum, module.clone(), module_size)?;
-            let cached = CachedModule {
+            cache.memory_cache.store(checksum, cached_module.clone())?;
+
+            let CachedModule {
                 module,
-                size_estimate: module_size,
-            };
-            let store = Store::new(cache.runtime_engine.clone());
-            return Ok((cached, store));
+                engine,
+                size_estimate: _,
+            } = cached_module;
+            let store = Store::new(engine);
+            return Ok((module, store));
         }
 
         // Re-compile module from wasm
@@ -433,21 +437,23 @@ where
         }
 
         // This time we'll hit the file-system cache.
-        let Some((module, module_size)) = cache.fs_cache.load(checksum, &cache.runtime_engine)?
+        let Some(cached_module) = cache
+            .fs_cache
+            .load(checksum, Some(self.instance_memory_limit))?
         else {
             return Err(VmError::generic_err(
                 "Can't load module from file system cache after storing it to file system cache (get_module)",
             ));
         };
-        cache
-            .memory_cache
-            .store(checksum, module.clone(), module_size)?;
-        let cached = CachedModule {
+        cache.memory_cache.store(checksum, cached_module.clone())?;
+
+        let CachedModule {
             module,
-            size_estimate: module_size,
-        };
-        let store = Store::new(cache.runtime_engine.clone());
-        Ok((cached, store))
+            engine,
+            size_estimate: _,
+        } = cached_module;
+        let store = Store::new(engine);
+        Ok((module, store))
     }
 }
 

diff --git a/packages/vm/src/modules/cached_module.rs b/packages/vm/src/modules/cached_module.rs
@@ -1,13 +1,30 @@
-use wasmer::Module;
+use wasmer::{Engine, Module};
+
+/// Some manual tests on Simon's machine showed that Engine is roughly 3-5 KB big,
+/// so give it a constant 10 KiB estimate.
+#[inline]
+pub fn engine_size_estimate() -> usize {
+    10 * 1024
+}
 
 #[derive(Debug, Clone)]
 pub struct CachedModule {
     pub module: Module,
+    /// The runtime engine to run this module. Ideally we could use a single engine
+    /// for all modules but the memory issue described in <https://github.com/wasmerio/wasmer/issues/4377>
+    /// requires using one engine per module as a workaround.
+    pub engine: Engine,
     /// The estimated size of this element in memory.
     /// Since the cached modules are just [rkyv](https://rkyv.org/) dumps of the Module
     /// instances, we use the file size of the module on disk (not the Wasm!)
     /// as an estimate for this.
-    /// Note: Since CosmWasm 1.4 (Wasmer 4), Store/Engine are not cached anymore.
-    /// The majority of the Module size is the Artifact.
+    ///
+    /// Between CosmWasm 1.4 (Wasmer 4) and 1.5.2, Store/Engine were not cached. This lead to a
+    /// memory consumption problem. From 1.5.2 on, Module and Engine are cached and Store is created
+    /// from Engine on demand.
+    ///
+    /// The majority of the Module size is the Artifact which is why we use the module filesize as the estimate.
+    /// Some manual tests on Simon's machine showed that Engine is roughly 3-5 KB big, so give it a constant
+    /// estimate: [`engine_size_estimate`].
     pub size_estimate: usize,
 }
diff --git a/packages/vm/src/modules/file_system_cache.rs b/packages/vm/src/modules/file_system_cache.rs
@@ -4,13 +4,18 @@ use std::io;
 use std::path::{Path, PathBuf};
 use thiserror::Error;
 
-use wasmer::{AsEngineRef, DeserializeError, Module, Target};
+use wasmer::{DeserializeError, Module, Target};
 
 use cosmwasm_std::Checksum;
 
 use crate::errors::{VmError, VmResult};
 use crate::filesystem::mkdir_p;
 use crate::modules::current_wasmer_module_version;
+use crate::wasm_backend::make_runtime_engine;
+use crate::Size;
+
+use super::cached_module::engine_size_estimate;
+use super::CachedModule;
 
 /// Bump this version whenever the module system changes in a way
 /// that old stored modules would be corrupt when loaded in the new system.
@@ -129,24 +134,29 @@ impl FileSystemCache {
         path
     }
 
-    /// Loads a serialized module from the file system and returns a module (i.e. artifact + store),
-    /// along with the size of the serialized module.
+    /// Loads a serialized module from the file system and returns a Module + Engine,
+    /// along with a size estimation for the pair.
     pub fn load(
         &self,
         checksum: &Checksum,
-        engine: &impl AsEngineRef,
-    ) -> VmResult<Option<(Module, usize)>> {
+        memory_limit: Option<Size>,
+    ) -> VmResult<Option<CachedModule>> {
         let file_path = self.module_file(checksum);
 
+        let engine = make_runtime_engine(memory_limit);
         let result = if self.unchecked_modules {
-            unsafe { Module::deserialize_from_file_unchecked(engine, &file_path) }
+            unsafe { Module::deserialize_from_file_unchecked(&engine, &file_path) }
         } else {
-            unsafe { Module::deserialize_from_file(engine, &file_path) }
+            unsafe { Module::deserialize_from_file(&engine, &file_path) }
         };
         match result {
             Ok(module) => {
                 let module_size = module_size(&file_path)?;
-                Ok(Some((module, module_size)))
+                Ok(Some(CachedModule {
+                    module,
+                    engine,
+                    size_estimate: module_size + engine_size_estimate(),
+                }))
             }
             Err(DeserializeError::Io(err)) => match err.kind() {
                 io::ErrorKind::NotFound => Ok(None),
@@ -225,7 +235,7 @@ mod tests {
     use super::*;
     use crate::{
         size::Size,
-        wasm_backend::{compile, make_compiling_engine, make_runtime_engine},
+        wasm_backend::{compile, make_compiling_engine},
     };
     use tempfile::TempDir;
     use wasmer::{imports, Instance as WasmerInstance, Store};
@@ -252,8 +262,7 @@ mod tests {
         let checksum = Checksum::generate(&wasm);
 
         // Module does not exist
-        let runtime_engine = make_runtime_engine(TESTING_MEMORY_LIMIT);
-        let cached = cache.load(&checksum, &runtime_engine).unwrap();
+        let cached = cache.load(&checksum, TESTING_MEMORY_LIMIT).unwrap();
         assert!(cached.is_none());
 
         // Store module
@@ -262,14 +271,21 @@ mod tests {
         cache.store(&checksum, &module).unwrap();
 
         // Load module
-        let cached = cache.load(&checksum, &runtime_engine).unwrap();
+        let cached = cache.load(&checksum, TESTING_MEMORY_LIMIT).unwrap();
         assert!(cached.is_some());
 
         // Check the returned module is functional.
         // This is not really testing the cache API but better safe than sorry.
         {
-            let (cached_module, module_size) = cached.unwrap();
-            assert_eq!(module_size, module.serialize().unwrap().len());
+            let CachedModule {
+                module: cached_module,
+                engine: runtime_engine,
+                size_estimate,
+            } = cached.unwrap();
+            assert_eq!(
+                size_estimate,
+                module.serialize().unwrap().len() + 10240 /* engine size estimate */
+            );
             let import_object = imports! {};
             let mut store = Store::new(runtime_engine);
             let instance = WasmerInstance::new(&mut store, &cached_module, &import_object).unwrap();
@@ -314,20 +330,25 @@ mod tests {
         let checksum = Checksum::generate(&wasm);
 
         // Store module
-        let engine1 = make_compiling_engine(TESTING_MEMORY_LIMIT);
-        let module = compile(&engine1, &wasm).unwrap();
+        let compiling_engine = make_compiling_engine(TESTING_MEMORY_LIMIT);
+        let module = compile(&compiling_engine, &wasm).unwrap();
         cache.store(&checksum, &module).unwrap();
 
         // It's there
-        let engine2 = make_runtime_engine(TESTING_MEMORY_LIMIT);
-        assert!(cache.load(&checksum, &engine2).unwrap().is_some());
+        assert!(cache
+            .load(&checksum, TESTING_MEMORY_LIMIT)
+            .unwrap()
+            .is_some());
 
         // Remove module
         let existed = cache.remove(&checksum).unwrap();
         assert!(existed);
 
         // it's gone now
-        assert!(cache.load(&checksum, &engine2).unwrap().is_none());
+        assert!(cache
+            .load(&checksum, TESTING_MEMORY_LIMIT)
+            .unwrap()
+            .is_none());
 
         // Remove again
         let existed = cache.remove(&checksum).unwrap();