From 03abc71205bbc83f16a353a21426b5e8c0684006 Mon Sep 17 00:00:00 2001 From: jinwei14 Date: Fri, 1 Dec 2023 01:18:45 -0800 Subject: [PATCH] Use TCMalloc to fix system memory leak (#494) * add fix for memory leak * cmake change for enable tcmalloc * add hot fix for cmake for boost and tcmalloc * fix indentation * identitation * change camke set on after cmake_minimum_required * unset tcmalloc for PYBIND * unset envirvariable beforehead * set off * exlucde the compile def for pybind * disable for pybind --- CMakeLists.txt | 22 ++++++++++++++-------- src/disk_utils.cpp | 10 +++++++--- src/dll/CMakeLists.txt | 3 +++ src/index.cpp | 2 +- src/partition.cpp | 2 +- src/pq.cpp | 6 ++++-- 6 files changed, 30 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 89530f818..3d3d2b860 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,7 @@ # such behavior. # Contact for this feature: gopalrs. + # Some variables like MSVC are defined only after project(), so put that first. cmake_minimum_required(VERSION 3.15) project(diskann) @@ -52,6 +53,9 @@ endif() include_directories(${PROJECT_SOURCE_DIR}/include) +if(NOT PYBIND) + set(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS ON) +endif() # It's necessary to include tcmalloc headers only if calling into MallocExtension interface. # For using tcmalloc in DiskANN tools, it's enough to just link with tcmalloc. if (DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) @@ -92,7 +96,9 @@ if (MSVC) set(Boost_USE_STATIC_LIBS ON) endif() -find_package(Boost COMPONENTS program_options) +if(NOT MSVC) + find_package(Boost COMPONENTS program_options) +endif() # For Windows, fall back to nuget version if find_package didn't find it. if (MSVC AND NOT Boost_FOUND) @@ -219,13 +225,13 @@ if (MSVC) # Tell CMake how to build the tcmalloc linker library from the submodule. add_custom_target(build_libtcmalloc_minimal DEPENDS ${TCMALLOC_LINK_LIBRARY}) add_custom_command(OUTPUT ${TCMALLOC_LINK_LIBRARY} - COMMAND ${CMAKE_VS_MSBUILD_COMMAND} gperftools.sln /m /nologo - /t:libtcmalloc_minimal /p:Configuration="Release-Patch" - /property:Platform="x64" - /p:PlatformToolset=v${MSVC_TOOLSET_VERSION} - /p:WindowsTargetPlatformVersion=${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION} - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/gperftools) - + COMMAND ${CMAKE_VS_MSBUILD_COMMAND} gperftools.sln /m /nologo + /t:libtcmalloc_minimal /p:Configuration="Release-Patch" + /property:Platform="x64" + /p:PlatformToolset=v${MSVC_TOOLSET_VERSION} + /p:WindowsTargetPlatformVersion=${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/gperftools) + add_library(libtcmalloc_minimal_for_exe STATIC IMPORTED) add_library(libtcmalloc_minimal_for_dll STATIC IMPORTED) diff --git a/src/disk_utils.cpp b/src/disk_utils.cpp index 297619b4a..b29d253a1 100644 --- a/src/disk_utils.cpp +++ b/src/disk_utils.cpp @@ -3,7 +3,7 @@ #include "common_includes.h" -#if defined(RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD) +#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD) #include "gperftools/malloc_extension.h" #endif @@ -675,7 +675,7 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr Timer timer; int num_parts = partition_with_ram_budget(base_file, sampling_rate, ram_budget, 2 * R / 3, merged_index_prefix, 2); - diskann::cout << timer.elapsed_seconds_for_step("partitioning data") << std::endl; + diskann::cout << timer.elapsed_seconds_for_step("partitioning data ") << std::endl; std::string cur_centroid_filepath = merged_index_prefix + "_centroids.bin"; std::rename(cur_centroid_filepath.c_str(), centroids_file.c_str()); @@ -683,6 +683,10 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr timer.reset(); for (int p = 0; p < num_parts; p++) { +#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD) + MallocExtension::instance()->ReleaseFreeMemory(); +#endif + std::string shard_base_file = merged_index_prefix + "_subshard-" + std::to_string(p) + ".bin"; std::string shard_ids_file = merged_index_prefix + "_subshard-" + std::to_string(p) + "_ids_uint32.bin"; @@ -1283,7 +1287,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const // Gopal. Splitting diskann_dll into separate DLLs for search and build. // This code should only be available in the "build" DLL. -#if defined(RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD) +#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD) MallocExtension::instance()->ReleaseFreeMemory(); #endif diff --git a/src/dll/CMakeLists.txt b/src/dll/CMakeLists.txt index d00cfeb95..b4726668f 100644 --- a/src/dll/CMakeLists.txt +++ b/src/dll/CMakeLists.txt @@ -10,6 +10,9 @@ set(TARGET_DIR "$<$:${CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG}>$<$ReleaseFreeMemory(); #endif compressed_file_writer.close();