Skip to content

Commit

Permalink
Add test for table writer special partition name (facebookincubator#5427
Browse files Browse the repository at this point in the history
)

Summary: Pull Request resolved: facebookincubator#5427

Reviewed By: xiaoxmeng

Differential Revision: D47112961

Pulled By: kewang1024

fbshipit-source-id: 3ca6fb2beb56eafe0b46eccf8fc36fdf6484a545
  • Loading branch information
kewang1024 authored and facebook-github-bot committed Jun 29, 2023
1 parent 0a36493 commit b69c0fd
Showing 1 changed file with 103 additions and 0 deletions.
103 changes: 103 additions & 0 deletions velox/exec/tests/TableWriteTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1101,6 +1101,109 @@ TEST_P(AllTableWriterTest, commitStrategies) {
}
}

TEST_P(PartitionedTableWriterTest, specialPartitionName) {
const int32_t numPartitions = 50;
const int32_t numBatches = 2;

const auto rowType =
ROW({"c0", "p0", "p1", "c1", "c3", "c5"},
{INTEGER(), INTEGER(), VARCHAR(), BIGINT(), REAL(), VARCHAR()});
const std::vector<std::string> partitionKeys = {"p0", "p1"};
const std::vector<TypePtr> partitionTypes = {INTEGER(), VARCHAR()};

const std::vector charsToEscape = {
'"',
'#',
'%',
'\'',
'*',
'/',
':',
'=',
'?',
'\\',
'\x7F',
'{',
'[',
']',
'^'};
ASSERT_GE(numPartitions, charsToEscape.size());
std::vector<RowVectorPtr> vectors = makeBatches(numBatches, [&](auto) {
return makeRowVector(
rowType->names(),
{
makeFlatVector<int32_t>(
numPartitions, [&](auto row) { return row + 100; }),
makeFlatVector<int32_t>(
numPartitions, [&](auto row) { return row; }),
makeFlatVector<StringView>(
numPartitions,
[&](auto row) {
// special character
return StringView::makeInline(
fmt::format("str_{}{}", row, charsToEscape.at(row % 15)));
}),
makeFlatVector<int64_t>(
numPartitions, [&](auto row) { return row + 1000; }),
makeFlatVector<float>(
numPartitions, [&](auto row) { return row + 33.23; }),
makeFlatVector<StringView>(
numPartitions,
[&](auto row) {
return StringView::makeInline(
fmt::format("bucket_{}", row * 3));
}),
});
});
createDuckDbTable(vectors);

auto inputFilePaths = makeFilePaths(numBatches);
for (int i = 0; i < numBatches; i++) {
writeToFile(inputFilePaths[i]->path, vectors[i]);
}

auto outputDirectory = TempDirectoryPath::create();
auto plan = createInsertPlan(
PlanBuilder().tableScan(rowType),
rowType,
outputDirectory->path,
partitionKeys,
bucketProperty_,
connector::hive::LocationHandle::TableType::kNew,
commitStrategy_);

auto task = assertQuery(plan, inputFilePaths, "SELECT count(*) FROM tmp");

std::set<std::string> actualPartitionDirectories =
getLeafSubdirectories(outputDirectory->path);

std::set<std::string> expectedPartitionDirectories;
const std::vector<std::string> expectedCharsAfterEscape = {
"%22",
"%23",
"%25",
"%27",
"%2A",
"%2F",
"%3A",
"%3D",
"%3F",
"%5C",
"%7F",
"%7B",
"%5B",
"%5D",
"%5E"};
for (auto i = 0; i < numPartitions; ++i) {
// url encoded
auto partitionName = fmt::format(
"p0={}/p1=str_{}{}", i, i, expectedCharsAfterEscape.at(i % 15));
expectedPartitionDirectories.emplace(
fs::path(outputDirectory->path) / partitionName);
}
EXPECT_EQ(actualPartitionDirectories, expectedPartitionDirectories);
}

TEST_P(PartitionedTableWriterTest, multiplePartitions) {
int32_t numPartitions = 50;
int32_t numBatches = 2;
Expand Down

0 comments on commit b69c0fd

Please sign in to comment.