Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make the cache configurable by cpp API #950

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions include/zim/archive.h
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,70 @@ namespace zim
*/
std::shared_ptr<FileImpl> getImpl() const { return m_impl; }

/** Get the maximum size of the cluster cache.
*
* @return The maximum number of clusters stored in the cache.
*/
size_t get_cluster_cache_max_size() const;

/** Get the current size of the cluster cache.
*
* @return The number of clusters currently stored in the cache.
*/
size_t get_cluster_cache_current_size() const;

/** Set the size of the cluster cache.
*
* If the new size is lower than the number of currently stored clusters
* some clusters will be dropped from cache to respect the new size.
*
* @param nb_clusters The maximum number of clusters stored in the cache.
*/
void set_cluster_cache_max_size(size_t nb_clusters);

/** Get the size of the dirent cache.
*
* @return The maximum number of dirents stored in the cache.
*/
size_t get_dirent_cache_max_size() const;

/** Get the current size of the dirent cache.
*
* @return The number of dirents currently stored in the cache.
*/
size_t get_dirent_cache_current_size() const;

/** Set the size of the dirent cache.
*
* If the new size is lower than the number of currently stored dirents
* some dirents will be dropped from cache to respect the new size.
*
* @param nb_dirents The maximum number of dirents stored in the cache.
*/
void set_dirent_cache_max_size(size_t new_dirents);

/** Get the size of the dirent lookup cache.
*
* The returned size returns the default size or the last set size.
* This may not correspond to the actual size of the dirent lookup cache.
* See `set_dirent_lookup_cache_max_size` for more information.
*
* @return The maximum number of sub ranges created in the lookup cache.
*/
size_t get_dirent_lookup_cache_max_size() const;

/** Set the size of the dirent lookup cache.
*
* Contrary to other `set_<foo>_cache_max_size`, this method is useless once
* the lookup cache is created.
* The lookup cache is created at first access to a entry in the archive.
* So this method must be called before any access to content (including metadata).
* It is best to call this method first, just after the archive creation.
*
* @param nb_ranges The maximum number of sub ranges created in the lookup cache.
*/
void set_dirent_lookup_cache_max_size(size_t nb_ranges);

#ifdef ZIM_PRIVATE
cluster_index_type getClusterCount() const;
offset_type getClusterOffset(cluster_index_type idx) const;
Expand Down
41 changes: 41 additions & 0 deletions src/archive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,47 @@ namespace zim
return m_impl->hasNewNamespaceScheme();
}

size_t Archive::get_cluster_cache_max_size() const
{
return m_impl->get_cluster_cache_max_size();
}

size_t Archive::get_cluster_cache_current_size() const
{
return m_impl->get_cluster_cache_current_size();
}

void Archive::set_cluster_cache_max_size(size_t nb_clusters)
{
m_impl->set_cluster_cache_max_size(nb_clusters);
}

size_t Archive::get_dirent_cache_max_size() const
{
return m_impl->get_dirent_cache_max_size();
}

size_t Archive::get_dirent_cache_current_size() const
{
return m_impl->get_dirent_cache_current_size();
}

void Archive::set_dirent_cache_max_size(size_t nb_dirents)
{
m_impl->set_dirent_cache_max_size(nb_dirents);
}


size_t Archive::get_dirent_lookup_cache_max_size() const
{
return m_impl->get_dirent_lookup_cache_max_size();
}

void Archive::set_dirent_lookup_cache_max_size(size_t nb_ranges)
{
m_impl->set_dirent_lookup_cache_max_size(nb_ranges);
}

cluster_index_type Archive::getClusterCount() const
{
return cluster_index_type(m_impl->getCountClusters());
Expand Down
5 changes: 1 addition & 4 deletions src/compression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,14 @@

#include "compression.h"

#include "envvalue.h"

#include <zim/tools.h>
#include <stdexcept>

const std::string LZMA_INFO::name = "lzma";
void LZMA_INFO::init_stream_decoder(stream_t* stream, char* raw_data)
{
*stream = LZMA_STREAM_INIT;
unsigned memsize = zim::envMemSize("ZIM_LZMA_MEMORY_SIZE", LZMA_MEMORY_SIZE * 1024 * 1024);
auto errcode = lzma_stream_decoder(stream, memsize, 0);
auto errcode = lzma_stream_decoder(stream, LZMA_MEMORY_SIZE * 1024 * 1024, 0);
if (errcode != LZMA_OK) {
throw std::runtime_error("Impossible to allocated needed memory to uncompress lzma stream");
}
Expand Down
18 changes: 17 additions & 1 deletion src/concurrent_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include "lrucache.h"

#include <cstddef>
#include <future>
#include <mutex>

Expand Down Expand Up @@ -84,9 +85,24 @@ class ConcurrentCache
return impl_.drop(key);
}

size_t get_max_size() const {
std::unique_lock<std::mutex> l(lock_);
return impl_.get_max_size();
}

size_t get_current_size() const {
std::unique_lock<std::mutex> l(lock_);
return impl_.size();
}

void set_max_size(size_t new_size) {
std::unique_lock<std::mutex> l(lock_);
return impl_.set_max_size(new_size);
}

private: // data
Impl impl_;
std::mutex lock_;
mutable std::mutex lock_;
};

} // namespace zim
Expand Down
3 changes: 1 addition & 2 deletions src/dirent_accessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

#include "direntreader.h"
#include "_dirent.h"
#include "envvalue.h"

#include <mutex>

Expand All @@ -36,7 +35,7 @@ DirectDirentAccessor::DirectDirentAccessor(
: mp_direntReader(direntReader),
mp_pathPtrReader(std::move(pathPtrReader)),
m_direntCount(direntCount),
m_direntCache(envValue("ZIM_DIRENTCACHE", DIRENT_CACHE_SIZE)),
m_direntCache(DIRENT_CACHE_SIZE),
m_bufferDirentZone(256)
{}

Expand Down
5 changes: 5 additions & 0 deletions src/dirent_accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "lrucache.h"
#include "config.h"

#include <cstddef>
#include <memory>
#include <mutex>
#include <vector>
Expand Down Expand Up @@ -54,6 +55,10 @@ class LIBZIM_PRIVATE_API DirectDirentAccessor
std::shared_ptr<const Dirent> getDirent(entry_index_t idx) const;
entry_index_t getDirentCount() const { return m_direntCount; }

size_t get_max_cache_size() const { return m_direntCache.get_max_size(); }
size_t get_current_cache_size() const { return m_direntCache.size(); }
void set_max_cache_size(size_t nb_dirents) const { m_direntCache.set_max_size(nb_dirents); }

private: // functions
std::shared_ptr<const Dirent> readDirent(offset_t) const;

Expand Down
13 changes: 11 additions & 2 deletions src/dirent_lookup.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,13 @@ class DirentLookup

public: // functions
explicit DirentLookup(const DirentAccessor* _direntAccessor);
virtual ~DirentLookup() = default;

index_t getNamespaceRangeBegin(char ns) const;
index_t getNamespaceRangeEnd(char ns) const;
virtual size_t getSize() const { return 0; };

Result find(char ns, const std::string& key) const;
virtual Result find(char ns, const std::string& key) const;

protected: // functions
int compareWithDirentAt(char ns, const std::string& key, entry_index_type i) const;
Expand Down Expand Up @@ -83,7 +85,8 @@ class FastDirentLookup : public DirentLookup<TConfig>
public: // functions
FastDirentLookup(const DirentAccessor* _direntAccessor, entry_index_type cacheEntryCount);

typename BaseType::Result find(char ns, const std::string& key) const;
virtual size_t getSize() const;
virtual typename BaseType::Result find(char ns, const std::string& key) const;

private: // functions
std::string getDirentKey(entry_index_type i) const;
Expand Down Expand Up @@ -204,6 +207,12 @@ DirentLookup<TConfig>::find(char ns, const std::string& key) const
return findInRange(0, direntCount, ns, key);
}

template<typename TConfig>
size_t FastDirentLookup<TConfig>::getSize() const {
return lookupGrid.getSize();
}


template<typename TConfig>
typename DirentLookup<TConfig>::Result
DirentLookup<TConfig>::findInRange(entry_index_type l, entry_index_type u, char ns, const std::string& key) const
Expand Down
58 changes: 0 additions & 58 deletions src/envvalue.cpp

This file was deleted.

29 changes: 0 additions & 29 deletions src/envvalue.h

This file was deleted.

Loading
Loading