Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 2369 - Report on whether partitions will be split #2378

Merged
merged 2 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,31 @@
import sleeper.core.schema.Field;
import sleeper.core.schema.Schema;
import sleeper.core.statestore.FileReference;
import sleeper.splitter.FindPartitionsToSplit;
import sleeper.splitter.PartitionSplitCheck;

import java.util.List;
import java.util.stream.Collectors;
import java.util.Set;
import java.util.stream.Stream;

import static java.util.stream.Collectors.toSet;
import static java.util.stream.Collectors.toUnmodifiableList;

public class PartitionStatus {

private final Partition partition;
private final List<FileReference> filesInPartition;
private final boolean needsSplitting;
private final boolean willBeSplit;
private final boolean maySplitIfCompacted;
private final Field splitField;
private final Object splitValue;
private final Integer indexInParent;

private PartitionStatus(Builder builder) {
partition = builder.partition;
filesInPartition = builder.filesInPartition;
needsSplitting = builder.needsSplitting;
willBeSplit = builder.willBeSplit;
maySplitIfCompacted = builder.maySplitIfCompacted;
splitField = builder.splitField;
splitValue = builder.splitValue;
indexInParent = builder.indexInParent;
Expand All @@ -48,21 +55,28 @@ private PartitionStatus(Builder builder) {
static PartitionStatus from(
TableProperties tableProperties, PartitionTree tree, Partition partition, List<FileReference> activeFiles) {
Schema schema = tableProperties.getSchema();
List<FileReference> filesInPartition = activeFiles.stream()
.filter(fileReference -> fileReference.getPartitionId().equals(partition.getId()))
.collect(Collectors.toList());
boolean needsSplitting = PartitionSplitCheck.fromFilesInPartition(tableProperties, filesInPartition).isNeedsSplitting();
List<FileReference> filesInPartitionForSplit = FindPartitionsToSplit.getFilesInPartition(partition, activeFiles);
List<FileReference> filesInPartitionOrAncestors = getFilesInPartitionOrAncestors(partition, tree, activeFiles);
boolean willBeSplit = PartitionSplitCheck.fromFilesInPartition(tableProperties, filesInPartitionForSplit).isNeedsSplitting();
boolean maySplitIfCompacted = PartitionSplitCheck.fromFilesInPartition(tableProperties, filesInPartitionOrAncestors).isNeedsSplitting();
return builder().partition(partition)
.filesInPartition(filesInPartition)
.needsSplitting(needsSplitting)
.filesInPartition(activeFiles.stream()
.filter(file -> file.getPartitionId().equals(partition.getId()))
.collect(toUnmodifiableList()))
.willBeSplit(willBeSplit)
.maySplitIfCompacted(maySplitIfCompacted)
.splitField(splitField(partition, schema))
.splitValue(splitValue(partition, tree, schema))
.indexInParent(indexInParent(partition, tree))
.build();
}

public boolean isNeedsSplitting() {
return needsSplitting;
public boolean willBeSplit() {
return willBeSplit;
}

public boolean maySplitIfCompacted() {
return maySplitIfCompacted;
}

public boolean isLeafPartition() {
Expand Down Expand Up @@ -133,10 +147,22 @@ private static Integer indexInParent(Partition partition, PartitionTree tree) {
return parent.getChildPartitionIds().indexOf(partition.getId());
}

private static List<FileReference> getFilesInPartitionOrAncestors(
Partition partition, PartitionTree tree, List<FileReference> fileReferences) {
Set<String> partitionIds = Stream.concat(
Stream.of(partition), tree.ancestorsOf(partition))
.map(Partition::getId)
.collect(toSet());
return fileReferences.stream()
.filter(file -> partitionIds.contains(file.getPartitionId()))
.collect(toUnmodifiableList());
}

public static final class Builder {
private Partition partition;
private List<FileReference> filesInPartition;
private boolean needsSplitting;
private boolean willBeSplit;
private boolean maySplitIfCompacted;
private Field splitField;
private Object splitValue;
private Integer indexInParent;
Expand All @@ -154,8 +180,13 @@ public Builder filesInPartition(List<FileReference> filesInPartition) {
return this;
}

public Builder needsSplitting(boolean needsSplitting) {
this.needsSplitting = needsSplitting;
public Builder willBeSplit(boolean willBeSplit) {
this.willBeSplit = willBeSplit;
return this;
}

public Builder maySplitIfCompacted(boolean maySplitIfCompacted) {
this.maySplitIfCompacted = maySplitIfCompacted;
return this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ public long getNumLeafPartitions() {
return partitions.stream().filter(PartitionStatus::isLeafPartition).count();
}

public long getNumLeafPartitionsThatNeedSplitting() {
public long getNumLeafPartitionsThatWillBeSplit() {
return partitions.stream()
.filter(PartitionStatus::isLeafPartition)
.filter(PartitionStatus::isNeedsSplitting)
.filter(PartitionStatus::willBeSplit)
.count();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ public class PartitionsStatusReporter {
private static final TableField APPROX_RECORDS = BUILDER.addNumericField("APPROX_RECORDS");
private static final TableField KNOWN_RECORDS = BUILDER.addNumericField("KNOWN_RECORDS");
private static final TableField LEAF = BUILDER.addField("LEAF");
private static final TableField NEEDS_SPLITTING = BUILDER.addField("NEEDS_SPLITTING");
private static final TableField WILL_BE_SPLIT = BUILDER.addField("WILL_BE_SPLIT");
private static final TableField MAY_SPLIT_IF_COMPACTED = BUILDER.addField("MAY_SPLIT_IF_COMPACTED");
private static final TableField SPLIT_FIELD = BUILDER.addField("SPLIT_FIELD");
private static final TableField SPLIT_VALUE = BUILDER.addField("SPLIT_VALUE");
private static final TableWriterFactory TABLE_FACTORY = BUILDER.build();
Expand All @@ -53,7 +54,7 @@ public void report(PartitionsStatus status) {
out.println("Partitions Status Report:");
out.println("--------------------------");
out.println("There are " + status.getNumPartitions() + " partitions (" + status.getNumLeafPartitions() + " leaf partitions)");
out.println("There are " + status.getNumLeafPartitionsThatNeedSplitting() + " leaf partitions that need splitting");
out.println("There are " + status.getNumLeafPartitionsThatWillBeSplit() + " leaf partitions that will be split");
out.println("Split threshold is " + status.getSplitThreshold() + " records");
TABLE_FACTORY.tableBuilder()
.itemsAndWriter(status.getPartitions(), PartitionsStatusReporter::writeRow)
Expand All @@ -69,16 +70,24 @@ private static void writeRow(PartitionStatus status, TableRow.Builder builder) {
.value(APPROX_RECORDS, status.getApproxRecords())
.value(KNOWN_RECORDS, status.getKnownRecords())
.value(LEAF, partition.isLeafPartition() ? "yes" : "no")
.value(NEEDS_SPLITTING, needsSplittingString(status))
.value(WILL_BE_SPLIT, willBeSplitString(status))
.value(MAY_SPLIT_IF_COMPACTED, maySplitIfCompactedString(status))
.value(SPLIT_FIELD, StandardProcessRunReporter.getOrNull(status.getSplitField(), Field::getName))
.value(SPLIT_VALUE, splitValueString(status));
}

private static String needsSplittingString(PartitionStatus status) {
private static String willBeSplitString(PartitionStatus status) {
if (!status.isLeafPartition()) {
return null;
}
return status.isNeedsSplitting() ? "yes" : "no";
return status.willBeSplit() ? "yes" : "no";
}

private static String maySplitIfCompactedString(PartitionStatus status) {
if (!status.isLeafPartition() || status.willBeSplit()) {
return null;
}
return status.maySplitIfCompacted() ? "yes" : "no";
}

private static String splitValueString(PartitionStatus status) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import static sleeper.clients.util.console.ConsoleOutput.CLEAR_CONSOLE;

class PartitionsStatusReportScreenTest extends AdminClientMockStoreBase {

@Test
void shouldRunPartitionStatusReport() throws Exception {
// Given
Expand All @@ -62,7 +63,7 @@ void shouldRunPartitionStatusReport() throws Exception {
.endsWith(PROMPT_RETURN_TO_MAIN + CLEAR_CONSOLE + MAIN_SCREEN)
.contains("Partitions Status Report:")
.contains("There are 3 partitions (2 leaf partitions")
.contains("There are 0 leaf partitions that need splitting")
.contains("There are 0 leaf partitions that will be split")
.contains("Split threshold is 1000000000 records");
confirmAndVerifyNoMoreInteractions();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ void shouldFindNoSplittingPartitionsWhenThresholdNotExceeded() throws StateStore
PartitionsStatus status = PartitionsStatus.from(tableProperties, store);

// Then
assertThat(status.getNumLeafPartitionsThatNeedSplitting()).isZero();
assertThat(status.getNumLeafPartitionsThatWillBeSplit()).isZero();
}

@Test
Expand All @@ -96,7 +96,7 @@ void shouldFindSplittingPartitionsWhenThresholdExceeded() throws StateStoreExcep
PartitionsStatus status = PartitionsStatus.from(tableProperties, store);

// Then
assertThat(status.getNumLeafPartitionsThatNeedSplitting()).isEqualTo(2);
assertThat(status.getNumLeafPartitionsThatWillBeSplit()).isEqualTo(2);
}

@Test
Expand All @@ -113,7 +113,7 @@ void shouldExcludeNonLeafPartitionsInNeedsSplittingCount() throws StateStoreExce
PartitionsStatus status = PartitionsStatus.from(tableProperties, store);

// Then
assertThat(status.getNumLeafPartitionsThatNeedSplitting()).isEqualTo(0);
assertThat(status.getNumLeafPartitionsThatWillBeSplit()).isEqualTo(0);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
Partitions Status Report:
--------------------------
There are 0 partitions (0 leaf partitions)
There are 0 leaf partitions that need splitting
There are 0 leaf partitions that will be split
Split threshold is 10 records
---------------------------------------------------------------------------------------------------------------------------
| ID | PARENT | PARENT_SIDE | FILES | APPROX_RECORDS | KNOWN_RECORDS | LEAF | NEEDS_SPLITTING | SPLIT_FIELD | SPLIT_VALUE |
---------------------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------------------------------------------
| ID | PARENT | PARENT_SIDE | FILES | APPROX_RECORDS | KNOWN_RECORDS | LEAF | WILL_BE_SPLIT | MAY_SPLIT_IF_COMPACTED | SPLIT_FIELD | SPLIT_VALUE |
--------------------------------------------------------------------------------------------------------------------------------------------------
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
Partitions Status Report:
--------------------------
There are 3 partitions (2 leaf partitions)
There are 0 leaf partitions that need splitting
There are 0 leaf partitions that will be split
Split threshold is 10 records
-----------------------------------------------------------------------------------------------------------------------------
| ID | PARENT | PARENT_SIDE | FILES | APPROX_RECORDS | KNOWN_RECORDS | LEAF | NEEDS_SPLITTING | SPLIT_FIELD | SPLIT_VALUE |
| L | root | min | 0 | 0 | 0 | yes | no | | |
| R | root | max | 0 | 0 | 0 | yes | no | | |
| root | | | 1 | 100 | 100 | no | | key | abc |
-----------------------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------------------------------------------
| ID | PARENT | PARENT_SIDE | FILES | APPROX_RECORDS | KNOWN_RECORDS | LEAF | WILL_BE_SPLIT | MAY_SPLIT_IF_COMPACTED | SPLIT_FIELD | SPLIT_VALUE |
| L | root | min | 0 | 0 | 0 | yes | no | yes | | |
| R | root | max | 0 | 0 | 0 | yes | no | yes | | |
| root | | | 1 | 100 | 100 | no | | | key | abc |
----------------------------------------------------------------------------------------------------------------------------------------------------
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
Partitions Status Report:
--------------------------
There are 5 partitions (3 leaf partitions)
There are 0 leaf partitions that need splitting
There are 0 leaf partitions that will be split
Split threshold is 10 records
-------------------------------------------------------------------------------------------------------------------------------
| ID | PARENT | PARENT_SIDE | FILES | APPROX_RECORDS | KNOWN_RECORDS | LEAF | NEEDS_SPLITTING | SPLIT_FIELD | SPLIT_VALUE |
| A | parent | min | 1 | 5 | 5 | yes | no | | |
| C | B | min | 1 | 5 | 5 | yes | no | | |
| D | B | max | 1 | 5 | 5 | yes | no | | |
| B | parent | max | 0 | 0 | 0 | no | | another-key | aaa |
| parent | | | 0 | 0 | 0 | no | | first-key | 123 |
-------------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------------------------------------
| ID | PARENT | PARENT_SIDE | FILES | APPROX_RECORDS | KNOWN_RECORDS | LEAF | WILL_BE_SPLIT | MAY_SPLIT_IF_COMPACTED | SPLIT_FIELD | SPLIT_VALUE |
| A | parent | min | 1 | 5 | 5 | yes | no | no | | |
| C | B | min | 1 | 5 | 5 | yes | no | no | | |
| D | B | max | 1 | 5 | 5 | yes | no | no | | |
| B | parent | max | 0 | 0 | 0 | no | | | another-key | aaa |
| parent | | | 0 | 0 | 0 | no | | | first-key | 123 |
------------------------------------------------------------------------------------------------------------------------------------------------------
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
Partitions Status Report:
--------------------------
There are 1 partitions (1 leaf partitions)
There are 0 leaf partitions that need splitting
There are 0 leaf partitions that will be split
Split threshold is 10 records
-----------------------------------------------------------------------------------------------------------------------------
| ID | PARENT | PARENT_SIDE | FILES | APPROX_RECORDS | KNOWN_RECORDS | LEAF | NEEDS_SPLITTING | SPLIT_FIELD | SPLIT_VALUE |
| root | | | 1 | 5 | 5 | yes | no | | |
-----------------------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------------------------------------------
| ID | PARENT | PARENT_SIDE | FILES | APPROX_RECORDS | KNOWN_RECORDS | LEAF | WILL_BE_SPLIT | MAY_SPLIT_IF_COMPACTED | SPLIT_FIELD | SPLIT_VALUE |
| root | | | 1 | 5 | 5 | yes | no | no | | |
----------------------------------------------------------------------------------------------------------------------------------------------------
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
Partitions Status Report:
--------------------------
There are 3 partitions (2 leaf partitions)
There are 0 leaf partitions that need splitting
There are 0 leaf partitions that will be split
Split threshold is 10 records
-------------------------------------------------------------------------------------------------------------------------------
| ID | PARENT | PARENT_SIDE | FILES | APPROX_RECORDS | KNOWN_RECORDS | LEAF | NEEDS_SPLITTING | SPLIT_FIELD | SPLIT_VALUE |
| A | parent | min | 1 | 5 | 5 | yes | no | | |
| B | parent | max | 1 | 5 | 5 | yes | no | | |
| parent | | | 0 | 0 | 0 | no | | key | aaa |
-------------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------------------------------------
| ID | PARENT | PARENT_SIDE | FILES | APPROX_RECORDS | KNOWN_RECORDS | LEAF | WILL_BE_SPLIT | MAY_SPLIT_IF_COMPACTED | SPLIT_FIELD | SPLIT_VALUE |
| A | parent | min | 1 | 5 | 5 | yes | no | no | | |
| B | parent | max | 1 | 5 | 5 | yes | no | no | | |
| parent | | | 0 | 0 | 0 | no | | | key | aaa |
------------------------------------------------------------------------------------------------------------------------------------------------------
Loading
Loading