Skip to content

Commit

Permalink
issue-1146: load NodeRefs upon tablet load (#2241)
Browse files Browse the repository at this point in the history
* issue-1146: load NodeRefs upon tablet load
  • Loading branch information
debnatkh authored Oct 16, 2024
1 parent fcae5e1 commit fc27cb9
Show file tree
Hide file tree
Showing 20 changed files with 542 additions and 52 deletions.
2 changes: 2 additions & 0 deletions cloud/filestore/config/storage.proto
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,8 @@ message TStorageConfig
reserved 375; // InMemoryIndexCacheNodeAttrsVerCapacity
optional uint64 InMemoryIndexCacheNodeRefsCapacity = 376;
reserved 377; // InMemoryIndexCacheNodeRefsVerCapacity
optional bool InMemoryIndexCacheLoadOnTabletStart = 398;
optional uint64 InMemoryIndexCacheLoadOnTabletStartRowsPerTx = 399;

// Used to send non-network metrics as network ones to HIVE,
// while we use them for load balancing
Expand Down
3 changes: 3 additions & 0 deletions cloud/filestore/libs/storage/core/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,9 @@ using TAliases = NProto::TStorageConfig::TFilestoreAliases;
xxx(InMemoryIndexCacheNodesCapacity, ui64, 0 )\
xxx(InMemoryIndexCacheNodeAttrsCapacity, ui64, 0 )\
xxx(InMemoryIndexCacheNodeRefsCapacity, ui64, 0 )\
xxx(InMemoryIndexCacheLoadOnTabletStart, bool, false )\
xxx(InMemoryIndexCacheLoadOnTabletStartRowsPerTx, ui64, 1000000 )\
\
xxx(NonNetworkMetricsBalancingFactor, ui32, 1_KB )\
\
xxx(AsyncDestroyHandleEnabled, bool, false )\
Expand Down
2 changes: 2 additions & 0 deletions cloud/filestore/libs/storage/core/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,8 @@ class TStorageConfig
ui64 GetInMemoryIndexCacheNodesCapacity() const;
ui64 GetInMemoryIndexCacheNodeAttrsCapacity() const;
ui64 GetInMemoryIndexCacheNodeRefsCapacity() const;
bool GetInMemoryIndexCacheLoadOnTabletStart() const;
ui64 GetInMemoryIndexCacheLoadOnTabletStartRowsPerTx() const;

bool GetAsyncDestroyHandleEnabled() const;
TDuration GetAsyncHandleOperationPeriod() const;
Expand Down
4 changes: 2 additions & 2 deletions cloud/filestore/libs/storage/service/service_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5820,11 +5820,11 @@ Y_UNIT_TEST_SUITE(TStorageServiceTest)
break;
}
case TEvIndexTabletPrivate::
EvLoadCompactionMapChunkCompleted: {
EvLoadCompactionMapChunkResponse: {
lastCompactionMapRangeId = Max(
event
->Get<TEvIndexTabletPrivate::
TEvLoadCompactionMapChunkCompleted>()
TEvLoadCompactionMapChunkResponse>()
->LastRangeId,
lastCompactionMapRangeId);
break;
Expand Down
3 changes: 3 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,9 @@ STFUNC(TIndexTabletActor::StateWork)
HFunc(TEvIndexTabletPrivate::TEvReadDataCompleted, HandleReadDataCompleted);
HFunc(TEvIndexTabletPrivate::TEvWriteDataCompleted, HandleWriteDataCompleted);
HFunc(TEvIndexTabletPrivate::TEvAddDataCompleted, HandleAddDataCompleted);
HFunc(
TEvIndexTabletPrivate::TEvLoadCompactionMapChunkResponse,
HandleLoadCompactionMapChunkResponse);

HFunc(TEvIndexTabletPrivate::TEvUpdateCounters, HandleUpdateCounters);
HFunc(TEvIndexTabletPrivate::TEvUpdateLeakyBucketCounters, HandleUpdateLeakyBucketCounters);
Expand Down
15 changes: 15 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_actor.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,17 @@ class TIndexTabletActor final

TVector<ui32> GenerateForceDeleteZeroCompactionRanges() const;

/**
* @brief If necessary, code can iteratively call ReadNodeRefs for all
* nodes. This will populate cache with node refs and allow us to perform
* ListNodes using in-memory index state by knowing that the nodeRefs cache
* is exhaustive.
*/
void LoadNodeRefs(
const NActors::TActorContext& ctx,
ui64 nodeId,
const TString& name);

void AddTransaction(
TRequestInfo& transaction,
TRequestInfo::TCancelRoutine cancelRoutine);
Expand Down Expand Up @@ -602,6 +613,10 @@ class TIndexTabletActor final
const TEvIndexTabletPrivate::TEvNodeUnlinkedInShard::TPtr& ev,
const NActors::TActorContext& ctx);

void HandleLoadCompactionMapChunkResponse(
const TEvIndexTabletPrivate::TEvLoadCompactionMapChunkResponse::TPtr& ev,
const NActors::TActorContext& ctx);

void SendMetricsToExecutor(const NActors::TActorContext& ctx);

bool HandleRequests(STFUNC_SIG);
Expand Down
20 changes: 10 additions & 10 deletions cloud/filestore/libs/storage/tablet/tablet_actor_loadstate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,13 @@ void TIndexTabletActor::CompleteTx_LoadState(

LOG_INFO_S(ctx, TFileStoreComponents::TABLET,
LogTag << " Scheduling startup events");

if (Config->GetInMemoryIndexCacheEnabled() &&
Config->GetInMemoryIndexCacheLoadOnTabletStart())
{
LoadNodeRefs(ctx, 0, "");
}

ScheduleSyncSessions(ctx);
ScheduleCleanupSessions(ctx);
RestartCheckpointDestruction(ctx);
Expand Down Expand Up @@ -373,8 +380,8 @@ void TIndexTabletActor::LoadNextCompactionMapChunkIfNeeded(

////////////////////////////////////////////////////////////////////////////////

void TIndexTabletActor::HandleLoadCompactionMapChunkCompleted(
const TEvIndexTabletPrivate::TEvLoadCompactionMapChunkCompleted::TPtr& ev,
void TIndexTabletActor::HandleLoadCompactionMapChunkResponse(
const TEvIndexTabletPrivate::TEvLoadCompactionMapChunkResponse::TPtr& ev,
const TActorContext& ctx)
{
const auto* msg = ev->Get();
Expand Down Expand Up @@ -488,18 +495,11 @@ void TIndexTabletActor::CompleteTx_LoadCompactionMapChunk(
}

using TNotification =
TEvIndexTabletPrivate::TEvLoadCompactionMapChunkCompleted;
TEvIndexTabletPrivate::TEvLoadCompactionMapChunkResponse;
auto notification = std::make_unique<TNotification>(
args.FirstRangeId,
args.LastRangeId);
NCloud::Send(ctx, SelfId(), std::move(notification));

if (args.RequestInfo->Sender != ctx.SelfID) {
using TResponse =
TEvIndexTabletPrivate::TEvLoadCompactionMapChunkResponse;
auto response = std::make_unique<TResponse>();
NCloud::Reply(ctx, *args.RequestInfo, std::move(response));
}
}

} // namespace NCloud::NFileStore::NStorage
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#include "tablet_actor.h"

namespace NCloud::NFileStore::NStorage {

using namespace NActors;

////////////////////////////////////////////////////////////////////////////////

void TIndexTabletActor::LoadNodeRefs(
const NActors::TActorContext& ctx,
ui64 nodeId,
const TString& name)
{
const ui64 maxNodeRefs = Config->GetInMemoryIndexCacheLoadOnTabletStartRowsPerTx();

LOG_INFO(
ctx,
TFileStoreComponents::TABLET,
"%s LoadNodeRefs iteration started (nodeId: %lu, name: %s, "
"maxNodeRefs: %lu)",
LogTag.c_str(),
nodeId,
name.c_str(),
maxNodeRefs);

ExecuteTx<TLoadNodeRefs>(
ctx,
nodeId,
name,
maxNodeRefs);
}

////////////////////////////////////////////////////////////////////////////////

bool TIndexTabletActor::ValidateTx_LoadNodeRefs(
const TActorContext& ctx,
TTxIndexTablet::TLoadNodeRefs& args)
{
LOG_INFO(
ctx,
TFileStoreComponents::TABLET,
"%s LoadingNodeRefs (nodeId: %lu, name: %s, maxNodeRefs: %lu)",
LogTag.c_str(),
args.NodeId,
args.Cookie.c_str(),
args.MaxNodeRefs);
return true;
}

bool TIndexTabletActor::PrepareTx_LoadNodeRefs(
const TActorContext& ctx,
IIndexTabletDatabase& db,
TTxIndexTablet::TLoadNodeRefs& args)
{
TVector<TIndexTabletDatabase::TNodeRef> nodeRefs;

bool ready = db.ReadNodeRefs(
args.NodeId,
args.Cookie,
args.MaxNodeRefs,
nodeRefs,
args.NextNodeId,
args.NextCookie);

LOG_INFO(
ctx,
TFileStoreComponents::TABLET,
"%s LoadingNodeRefs (nodeId: %lu, name: %s, maxNodeRefs: %lu), read "
"%lu nodeRefs: %s",
LogTag.c_str(),
args.NodeId,
args.Cookie.c_str(),
args.MaxNodeRefs,
nodeRefs.size(),
ready ? "finished" : "restarted");

return ready;
}

void TIndexTabletActor::CompleteTx_LoadNodeRefs(
const TActorContext& ctx,
TTxIndexTablet::TLoadNodeRefs& args)
{
LOG_INFO(
ctx,
TFileStoreComponents::TABLET,
"%s LoadNodeRefs iteration completed, next nodeId: %lu, next cookie: "
"%s",
LogTag.c_str(),
args.NextNodeId,
args.NextCookie.c_str());

if (args.NextCookie || args.NextNodeId) {
LoadNodeRefs(ctx, args.NextNodeId, args.NextCookie);
} else {
LOG_INFO(
ctx,
TFileStoreComponents::TABLET,
"%s LoadNodeRefs completed",
LogTag.c_str());

MarkNodeRefsLoadComplete();
}
}

} // namespace NCloud::NFileStore::NStorage
97 changes: 83 additions & 14 deletions cloud/filestore/libs/storage/tablet/tablet_database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,50 @@ bool TIndexTabletDatabase::ReadNodeRefs(
return true;
}

bool TIndexTabletDatabase::ReadNodeRefs(
ui64 startNodeId,
const TString& startCookie,
ui64 maxCount,
TVector<IIndexTabletDatabase::TNodeRef>& refs,
ui64& nextNodeId,
TString& nextCookie)
{
using TTable = TIndexTabletSchema::NodeRefs;

if (!startNodeId && startCookie.empty()) {
Table<TTable>().Precharge();
}

auto it = Table<TTable>().GreaterOrEqual(startNodeId, startCookie).Select();

if (!it.IsReady()) {
return false; // not ready
}

while (it.IsValid() && maxCount > 0) {
refs.emplace_back(TNodeRef{
it.GetValue<TTable::NodeId>(),
it.GetValue<TTable::Name>(),
it.GetValue<TTable::ChildId>(),
it.GetValue<TTable::ShardId>(),
it.GetValue<TTable::ShardName>(),
it.GetValue<TTable::CommitId>(),
InvalidCommitId});
--maxCount;

if (!it.Next()) {
return false; // not ready
}
}

if (it.IsValid()) {
nextNodeId = it.GetValue<TTable::NodeId>();
nextCookie = it.GetValue<TTable::Name>();
}

return true;
}

bool TIndexTabletDatabase::PrechargeNodeRefs(
ui64 nodeId,
const TString& cookie,
Expand Down Expand Up @@ -2030,13 +2074,7 @@ bool TIndexTabletDatabaseProxy::ReadNodeRef(
if (result && ref) {
// If ReadNodeRef was successful, it is reasonable to update the cache
// with the value that has just been read.
NodeUpdates.emplace_back(TInMemoryIndexState::TWriteNodeRefsRequest{
.NodeRefsKey = {nodeId, name},
.NodeRefsRow = {
.CommitId = ref->MinCommitId,
.ChildId = ref->ChildNodeId,
.ShardId = ref->ShardId,
.ShardName = ref->ShardName}});
NodeUpdates.emplace_back(ExtractWriteNodeRefsFromNodeRef(*ref));
}
return result;
}
Expand All @@ -2055,13 +2093,32 @@ bool TIndexTabletDatabaseProxy::ReadNodeRefs(
// If ReadNodeRefs was successful, it is reasonable to update the cache
// with the values that have just been read.
for (const auto& ref: refs) {
NodeUpdates.emplace_back(TInMemoryIndexState::TWriteNodeRefsRequest{
.NodeRefsKey = {nodeId, ref.Name},
.NodeRefsRow = {
.CommitId = ref.MinCommitId,
.ChildId = ref.ChildNodeId,
.ShardId = ref.ShardId,
.ShardName = ref.ShardName}});
NodeUpdates.emplace_back(ExtractWriteNodeRefsFromNodeRef(ref));
}
}
return result;
}

bool TIndexTabletDatabaseProxy::ReadNodeRefs(
ui64 startNodeId,
const TString& startCookie,
ui64 maxCount,
TVector<IIndexTabletDatabase::TNodeRef>& refs,
ui64& nextNodeId,
TString& nextCookie)
{
auto result = TIndexTabletDatabase::ReadNodeRefs(
startNodeId,
startCookie,
maxCount,
refs,
nextNodeId,
nextCookie);
if (result) {
// If ReadNodeRefs was successful, it is reasonable to update the cache
// with the values that have just been read.
for (const auto& ref: refs) {
NodeUpdates.emplace_back(ExtractWriteNodeRefsFromNodeRef(ref));
}
}
return result;
Expand Down Expand Up @@ -2127,4 +2184,16 @@ void TIndexTabletDatabaseProxy::DeleteNodeRefVer(
// TODO(#1146): _Ver tables not yet supported
}

TInMemoryIndexState::TWriteNodeRefsRequest
TIndexTabletDatabaseProxy::ExtractWriteNodeRefsFromNodeRef(const TNodeRef& ref)
{
return TInMemoryIndexState::TWriteNodeRefsRequest{
.NodeRefsKey = {ref.NodeId, ref.Name},
.NodeRefsRow = {
.CommitId = ref.MinCommitId,
.ChildId = ref.ChildNodeId,
.ShardId = ref.ShardId,
.ShardName = ref.ShardName}};
}

} // namespace NCloud::NFileStore::NStorage
19 changes: 19 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_database.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,14 @@ FILESTORE_FILESYSTEM_STATS(FILESTORE_DECLARE_STATS)
ui32 maxBytes,
TString* next = nullptr) override;

virtual bool ReadNodeRefs(
ui64 startNodeId,
const TString& startCookie,
ui64 maxCount,
TVector<IIndexTabletDatabase::TNodeRef>& refs,
ui64& nextNodeId,
TString& nextCookie) override;

virtual bool PrechargeNodeRefs(
ui64 nodeId,
const TString& cookie,
Expand Down Expand Up @@ -611,6 +619,14 @@ class TIndexTabletDatabaseProxy: public TIndexTabletDatabase
ui32 maxBytes,
TString* next = nullptr) override;

bool ReadNodeRefs(
ui64 startNodeId,
const TString& startCookie,
ui64 maxCount,
TVector<IIndexTabletDatabase::TNodeRef>& refs,
ui64& nextNodeId,
TString& nextCookie) override;

void WriteNodeRef(
ui64 nodeId,
ui64 commitId,
Expand Down Expand Up @@ -641,6 +657,9 @@ class TIndexTabletDatabaseProxy: public TIndexTabletDatabase

private:
TVector<TInMemoryIndexState::TIndexStateRequest>& NodeUpdates;

static TInMemoryIndexState::TWriteNodeRefsRequest
ExtractWriteNodeRefsFromNodeRef(const TNodeRef& ref);
};

} // namespace NCloud::NFileStore::NStorage
Loading

0 comments on commit fc27cb9

Please sign in to comment.