diff --git a/cloud/src/recycler/hdfs_accessor.cpp b/cloud/src/recycler/hdfs_accessor.cpp index ffbc5788486b64..5394c39c12245e 100644 --- a/cloud/src/recycler/hdfs_accessor.cpp +++ b/cloud/src/recycler/hdfs_accessor.cpp @@ -368,6 +368,14 @@ int HdfsAccessor::init() { return 0; } + // Currently, the hdfs accessor deletes files on hdfs through a prefix parameter. + // The format of the prefix parameter is data/{tablet id}/{rowset_prefix}, + // for example: data/492211/02000000008a012957476a3e174dfdaa71ee5f80a3abafa3_. + // Since the hdfs cpp sdk doesn't provide an interface for deleting by prefix, + // we need to extract the tablet id path from the given prefix, + // traverse all files in the tablet id path, and delete the files that match the prefix. + // This implementation is not ideal because the hdfs accessor needs to be aware of the path structure. + // We will optimize this in the future. int HdfsAccessor::delete_prefix(const std::string& path_prefix, int64_t expiration_time) { auto uri = to_uri(path_prefix); LOG(INFO) << "delete prefix, uri=" << uri; diff --git a/cloud/test/hdfs_accessor_test.cpp b/cloud/test/hdfs_accessor_test.cpp index d0ea8a4ae0517e..72188e4c339a24 100644 --- a/cloud/test/hdfs_accessor_test.cpp +++ b/cloud/test/hdfs_accessor_test.cpp @@ -277,14 +277,12 @@ TEST(HdfsAccessorTest, delete_prefix) { EXPECT_TRUE(list_files.contains("data/20000/1_0.dat")); EXPECT_TRUE(list_files.contains("data111/10000/1_0.dat")); - ret = accessor.delete_prefix("data/10000"); - EXPECT_EQ(ret, -1); - ret = accessor.delete_prefix("data111/10000"); - EXPECT_EQ(ret, -1); + ret = accessor.delete_prefix("data/10000/1_"); + EXPECT_EQ(ret, 0); + ret = accessor.delete_prefix("data/10000/2_"); + EXPECT_EQ(ret, 0); ret = accessor.delete_prefix("data/20000/1_"); EXPECT_EQ(ret, 0); - ret = accessor.delete_prefix("data/10000/1"); - EXPECT_EQ(ret, -1); iter.reset(); ret = accessor.list_all(&iter); @@ -294,8 +292,8 @@ TEST(HdfsAccessorTest, delete_prefix) { list_files.insert(std::move(file->path)); } EXPECT_EQ(list_files.size(), 3); - EXPECT_TRUE(list_files.contains("data/10000/20000/1_0.dat")); EXPECT_TRUE(list_files.contains("data/10000/20000/30000/1_0.dat")); + EXPECT_TRUE(list_files.contains("data/10000/20000/1_0.dat")); EXPECT_TRUE(list_files.contains("data111/10000/1_0.dat")); }