Skip to content

Commit

Permalink
Use TCMalloc to fix system memory leak (#494)
Browse files Browse the repository at this point in the history
* add fix for memory leak

* cmake change for enable tcmalloc

* add hot fix for cmake for boost and tcmalloc

* fix indentation

* identitation

* change camke set on after cmake_minimum_required

* unset tcmalloc for PYBIND

* unset envirvariable beforehead

* set off

* exlucde the compile def for pybind

* disable for pybind
  • Loading branch information
jinwei14 authored Dec 1, 2023
1 parent 87990da commit 03abc71
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 15 deletions.
22 changes: 14 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# such behavior.
# Contact for this feature: gopalrs.


# Some variables like MSVC are defined only after project(), so put that first.
cmake_minimum_required(VERSION 3.15)
project(diskann)
Expand Down Expand Up @@ -52,6 +53,9 @@ endif()

include_directories(${PROJECT_SOURCE_DIR}/include)

if(NOT PYBIND)
set(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS ON)
endif()
# It's necessary to include tcmalloc headers only if calling into MallocExtension interface.
# For using tcmalloc in DiskANN tools, it's enough to just link with tcmalloc.
if (DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS)
Expand Down Expand Up @@ -92,7 +96,9 @@ if (MSVC)
set(Boost_USE_STATIC_LIBS ON)
endif()

find_package(Boost COMPONENTS program_options)
if(NOT MSVC)
find_package(Boost COMPONENTS program_options)
endif()

# For Windows, fall back to nuget version if find_package didn't find it.
if (MSVC AND NOT Boost_FOUND)
Expand Down Expand Up @@ -219,13 +225,13 @@ if (MSVC)
# Tell CMake how to build the tcmalloc linker library from the submodule.
add_custom_target(build_libtcmalloc_minimal DEPENDS ${TCMALLOC_LINK_LIBRARY})
add_custom_command(OUTPUT ${TCMALLOC_LINK_LIBRARY}
COMMAND ${CMAKE_VS_MSBUILD_COMMAND} gperftools.sln /m /nologo
/t:libtcmalloc_minimal /p:Configuration="Release-Patch"
/property:Platform="x64"
/p:PlatformToolset=v${MSVC_TOOLSET_VERSION}
/p:WindowsTargetPlatformVersion=${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/gperftools)

COMMAND ${CMAKE_VS_MSBUILD_COMMAND} gperftools.sln /m /nologo
/t:libtcmalloc_minimal /p:Configuration="Release-Patch"
/property:Platform="x64"
/p:PlatformToolset=v${MSVC_TOOLSET_VERSION}
/p:WindowsTargetPlatformVersion=${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/gperftools)
add_library(libtcmalloc_minimal_for_exe STATIC IMPORTED)
add_library(libtcmalloc_minimal_for_dll STATIC IMPORTED)

Expand Down
10 changes: 7 additions & 3 deletions src/disk_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include "common_includes.h"

#if defined(RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD)
#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD)
#include "gperftools/malloc_extension.h"
#endif

Expand Down Expand Up @@ -675,14 +675,18 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr
Timer timer;
int num_parts =
partition_with_ram_budget<T>(base_file, sampling_rate, ram_budget, 2 * R / 3, merged_index_prefix, 2);
diskann::cout << timer.elapsed_seconds_for_step("partitioning data") << std::endl;
diskann::cout << timer.elapsed_seconds_for_step("partitioning data ") << std::endl;

std::string cur_centroid_filepath = merged_index_prefix + "_centroids.bin";
std::rename(cur_centroid_filepath.c_str(), centroids_file.c_str());

timer.reset();
for (int p = 0; p < num_parts; p++)
{
#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD)
MallocExtension::instance()->ReleaseFreeMemory();

This comment has been minimized.

Copy link
@jinwei14

jinwei14 Sep 12, 2024

Author Contributor

this line is called after the beginning of each shard build

#endif

std::string shard_base_file = merged_index_prefix + "_subshard-" + std::to_string(p) + ".bin";

std::string shard_ids_file = merged_index_prefix + "_subshard-" + std::to_string(p) + "_ids_uint32.bin";
Expand Down Expand Up @@ -1283,7 +1287,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const

// Gopal. Splitting diskann_dll into separate DLLs for search and build.
// This code should only be available in the "build" DLL.
#if defined(RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD)
#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD)
MallocExtension::instance()->ReleaseFreeMemory();
#endif

Expand Down
3 changes: 3 additions & 0 deletions src/dll/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ set(TARGET_DIR "$<$<CONFIG:Debug>:${CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG}>$<$<CO

set(DISKANN_DLL_IMPLIB "${TARGET_DIR}/${PROJECT_NAME}.lib")

if (NOT PYBIND)
target_compile_definitions(${PROJECT_NAME} PRIVATE DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS DISKANN_BUILD)
endif()
target_compile_definitions(${PROJECT_NAME} PRIVATE _USRDLL _WINDLL)
target_compile_options(${PROJECT_NAME} PRIVATE /GL)
target_include_directories(${PROJECT_NAME} PRIVATE ${DISKANN_MKL_INCLUDE_DIRECTORIES})
Expand Down
2 changes: 1 addition & 1 deletion src/index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include "tsl/robin_map.h"
#include "tsl/robin_set.h"
#include "windows_customizations.h"
#if defined(RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD)
#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD)
#include "gperftools/malloc_extension.h"
#endif

Expand Down
2 changes: 1 addition & 1 deletion src/partition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include "tsl/robin_map.h"
#include "tsl/robin_set.h"

#if defined(RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD)
#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD)
#include "gperftools/malloc_extension.h"
#endif

Expand Down
6 changes: 4 additions & 2 deletions src/pq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// Licensed under the MIT license.

#include "mkl.h"

#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD)
#include "gperftools/malloc_extension.h"
#endif
#include "pq.h"
#include "partition.h"
#include "math_utils.h"
Expand Down Expand Up @@ -923,7 +925,7 @@ int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_cent
}
// Gopal. Splitting diskann_dll into separate DLLs for search and build.
// This code should only be available in the "build" DLL.
#if defined(RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD)
#if defined(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS) && defined(DISKANN_BUILD)
MallocExtension::instance()->ReleaseFreeMemory();
#endif
compressed_file_writer.close();
Expand Down

0 comments on commit 03abc71

Please sign in to comment.