Skip to content

Commit

Permalink
Implement merge/replace branch in BranchManager
Browse files Browse the repository at this point in the history
  • Loading branch information
sunxiaojian committed Apr 8, 2024
1 parent fd905dd commit edc7d44
Show file tree
Hide file tree
Showing 12 changed files with 370 additions and 52 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@

import static org.apache.paimon.deletionvectors.DeletionVectorsIndexFile.DELETION_VECTORS_INDEX;
import static org.apache.paimon.index.HashIndexFile.HASH_INDEX;
import static org.apache.paimon.utils.BranchManager.DEFAULT_MAIN_BRANCH;

/**
* Default implementation of {@link FileStoreCommit}.
Expand Down Expand Up @@ -751,10 +750,7 @@ public boolean tryCommitOnce(
@Nullable String newStatsFileName) {
long newSnapshotId =
latestSnapshot == null ? Snapshot.FIRST_SNAPSHOT_ID : latestSnapshot.id() + 1;
Path newSnapshotPath =
branchName.equals(DEFAULT_MAIN_BRANCH)
? snapshotManager.snapshotPath(newSnapshotId)
: snapshotManager.branchSnapshotPath(branchName, newSnapshotId);
Path newSnapshotPath = snapshotManager.snapshotPath(branchName, newSnapshotId);

if (LOG.isDebugEnabled()) {
LOG.debug("Ready to commit table files to snapshot #" + newSnapshotId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,16 @@ public List<TableSchema> listAll() {
return listAllIds().stream().map(this::schema).collect(Collectors.toList());
}

/** List all schema IDs with branch. */
public List<Long> listAllIdsWithBranch(String branchName) {
try {
return listVersionedFiles(fileIO, branchSchemaDirectory(branchName), SCHEMA_PREFIX)
.collect(Collectors.toList());
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}

/** List all schema IDs. */
public List<Long> listAllIds() {
try {
Expand Down Expand Up @@ -497,22 +507,25 @@ public static TableSchema fromPath(FileIO fileIO, Path path) {
}
}

private Path schemaDirectory() {
return new Path(tableRoot + "/schema");
public Path schemaDirectory() {
return branchSchemaDirectory(DEFAULT_MAIN_BRANCH);
}

@VisibleForTesting
public Path toSchemaPath(long id) {
return new Path(tableRoot + "/schema/" + SCHEMA_PREFIX + id);
return branchSchemaPath(DEFAULT_MAIN_BRANCH, id);
}

public Path branchSchemaDirectory(String branchName) {
return new Path(getBranchPath(tableRoot, branchName) + "/schema");
return new Path(getBranchPath(fileIO, tableRoot, branchName) + "/schema");
}

public Path branchSchemaPath(String branchName, long schemaId) {
return new Path(
getBranchPath(tableRoot, branchName) + "/schema/" + SCHEMA_PREFIX + schemaId);
getBranchPath(fileIO, tableRoot, branchName)
+ "/schema/"
+ SCHEMA_PREFIX
+ schemaId);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,11 @@ public void deleteBranch(String branchName) {
branchManager().deleteBranch(branchName);
}

@Override
public void replaceBranch(String fromBranch) {
branchManager().replaceBranch(fromBranch);
}

@Override
public void rollbackTo(String tagName) {
TagManager tagManager = tagManager();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,14 @@ default void deleteBranch(String branchName) {
this.getClass().getSimpleName()));
}

@Override
default void replaceBranch(String fromBranch) {
throw new UnsupportedOperationException(
String.format(
"Readonly Table %s does not support replaceBranch.",
this.getClass().getSimpleName()));
}

@Override
default ExpireSnapshots newExpireSnapshots() {
throw new UnsupportedOperationException(
Expand Down
3 changes: 3 additions & 0 deletions paimon-core/src/main/java/org/apache/paimon/table/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ public interface Table extends Serializable {
@Experimental
void deleteBranch(String branchName);

@Experimental
void replaceBranch(String fromBranch);

/** Manually expire snapshots, parameters can be controlled independently of table options. */
@Experimental
ExpireSnapshots newExpireSnapshots();
Expand Down
132 changes: 125 additions & 7 deletions paimon-core/src/main/java/org/apache/paimon/utils/BranchManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.paimon.fs.FileIO;
import org.apache.paimon.fs.Path;
import org.apache.paimon.schema.SchemaManager;
import org.apache.paimon.schema.TableSchema;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.table.FileStoreTableFactory;

Expand All @@ -32,8 +33,11 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.SortedMap;
import java.util.stream.Collectors;

Expand All @@ -47,6 +51,7 @@ public class BranchManager {

public static final String BRANCH_PREFIX = "branch-";
public static final String DEFAULT_MAIN_BRANCH = "main";
public static final String MAIN_BRANCH_FILE = "MAIN-BRANCH";

private final FileIO fileIO;
private final Path tablePath;
Expand All @@ -67,19 +72,42 @@ public BranchManager(
this.schemaManager = schemaManager;
}

/** Commit specify branch to main. */
public void commitMainBranch(String branchName) throws IOException {
Path mainBranchFile = new Path(tablePath, MAIN_BRANCH_FILE);
fileIO.overwriteFileUtf8(mainBranchFile, branchName);
}

/** Return the root Directory of branch. */
public Path branchDirectory() {
return new Path(tablePath + "/branch");
}

/** Return the path string of a branch. */
public static String getBranchPath(Path tablePath, String branchName) {
public static String getBranchPath(FileIO fileIO, Path tablePath, String branchName) {
if (branchName.equals(DEFAULT_MAIN_BRANCH)) {
Path path = new Path(tablePath, MAIN_BRANCH_FILE);
try {
if (fileIO.exists(path)) {
String data = fileIO.readFileUtf8(path);
if (StringUtils.isBlank(data)) {
return tablePath.toString();
} else {
return tablePath.toString() + "/branch/" + BRANCH_PREFIX + data;
}
} else {
return tablePath.toString();
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
return tablePath.toString() + "/branch/" + BRANCH_PREFIX + branchName;
}

/** Return the path of a branch. */
public Path branchPath(String branchName) {
return new Path(getBranchPath(tablePath, branchName));
return new Path(getBranchPath(fileIO, tablePath, branchName));
}

public void createBranch(String branchName, String tagName) {
Expand All @@ -101,18 +129,18 @@ public void createBranch(String branchName, String tagName) {
try {
// Copy the corresponding tag, snapshot and schema files into the branch directory
fileIO.copyFileUtf8(
tagManager.tagPath(tagName), tagManager.branchTagPath(branchName, tagName));
tagManager.tagPath(tagName), tagManager.tagPath(branchName, tagName));
fileIO.copyFileUtf8(
snapshotManager.snapshotPath(snapshot.id()),
snapshotManager.branchSnapshotPath(branchName, snapshot.id()));
snapshotManager.snapshotPath(branchName, snapshot.id()));
fileIO.copyFileUtf8(
schemaManager.toSchemaPath(snapshot.schemaId()),
schemaManager.branchSchemaPath(branchName, snapshot.schemaId()));
} catch (IOException e) {
throw new RuntimeException(
String.format(
"Exception occurs when create branch '%s' (directory in %s).",
branchName, getBranchPath(tablePath, branchName)),
branchName, getBranchPath(fileIO, tablePath, branchName)),
e);
}
}
Expand All @@ -126,11 +154,101 @@ public void deleteBranch(String branchName) {
LOG.info(
String.format(
"Deleting the branch failed due to an exception in deleting the directory %s. Please try again.",
getBranchPath(tablePath, branchName)),
getBranchPath(fileIO, tablePath, branchName)),
e);
}
}

/** Replace specify branch to main branch. */
public void replaceBranch(String branchName) {
checkArgument(!StringUtils.isBlank(branchName), "Branch name '%s' is blank.", branchName);
checkArgument(branchExists(branchName), "Branch name '%s' not exists.", branchName);
try {
// 0. Cache previous tag,snapshot,schema directory.
Path tagDirectory = tagManager.tagDirectory();
Path snapshotDirectory = snapshotManager.snapshotDirectory();
Path schemaDirectory = schemaManager.schemaDirectory();
// 1. Calculate and copy the snapshots, tags and schemas which should be copied from the
// main branch to target branch.
calculateCopyMainToBranch(branchName);
// 2. Update the Main Branch File to the target branch.
commitMainBranch(branchName);
// 3.Drop the previous main branch, including snapshots, tags and schemas.
dropPreviousMainBranch(tagDirectory, snapshotDirectory, schemaDirectory);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

/** Calculate copy main branch to target branch. */
private void calculateCopyMainToBranch(String branchName) throws IOException {
TableBranch fromBranch =
this.branches().stream()
.filter(branch -> branch.getBranchName().equals(branchName))
.findFirst()
.orElse(null);
if (fromBranch == null) {
throw new RuntimeException(String.format("No branches found %s", branchName));
}
Snapshot fromSnapshot = snapshotManager.snapshot(fromBranch.getCreatedFromSnapshot());
// Copy tags.
List<String> tags = tagManager.allTagNames();
for (String tagName : tags) {
if (tagManager.tagExists(branchName, tagName)) {
// If it already exists, skip it directly.
continue;
}
Snapshot snapshot = tagManager.taggedSnapshot(tagName);
if (snapshot.id() < fromSnapshot.id()) {
fileIO.copyFileUtf8(
tagManager.tagPath(tagName), tagManager.tagPath(branchName, tagName));
}
}
// Copy snapshots.
Iterator<Snapshot> snapshots = snapshotManager.snapshots();
while (snapshots.hasNext()) {
Snapshot snapshot = snapshots.next();
if (snapshotManager.snapshotExists(branchName, snapshot.id())) {
// If it already exists, skip it directly.
continue;
}
if (snapshot.id() < fromSnapshot.id()) {
fileIO.copyFileUtf8(
snapshotManager.snapshotPath(snapshot.id()),
snapshotManager.snapshotPath(branchName, snapshot.id()));
}
}

// Copy schemas.
List<Long> schemaIds = schemaManager.listAllIds();
Set<Long> existsSchemas = new HashSet<>(schemaManager.listAllIdsWithBranch(branchName));
for (Long schemaId : schemaIds) {
TableSchema tableSchema = schemaManager.schema(schemaId);
if (existsSchemas.contains(schemaId)) {
// If it already exists, skip it directly.
continue;
}
if (tableSchema.id() < fromSnapshot.schemaId()) {
fileIO.copyFileUtf8(
schemaManager.toSchemaPath(schemaId),
schemaManager.branchSchemaPath(branchName, schemaId));
}
}
}

/** Directly delete snapshot, tag , schema directory. */
private void dropPreviousMainBranch(
Path tagDirectory, Path snapshotDirectory, Path schemaDirectory) throws IOException {
// Delete tags.
fileIO.delete(tagDirectory, true);

// Delete snapshots.
fileIO.delete(snapshotDirectory, true);

// Delete schemas.
fileIO.delete(schemaDirectory, true);
}

/** Check if path exists. */
public boolean fileExists(Path path) {
try {
Expand Down Expand Up @@ -172,7 +290,7 @@ public List<TableBranch> branches() {
String branchName = path.getLeft().getName().substring(BRANCH_PREFIX.length());
FileStoreTable branchTable =
FileStoreTableFactory.create(
fileIO, new Path(getBranchPath(tablePath, branchName)));
fileIO, new Path(getBranchPath(fileIO, tablePath, branchName)));
SortedMap<Snapshot, List<String>> snapshotTags = branchTable.tagManager().tags();
checkArgument(!snapshotTags.isEmpty());
Snapshot snapshot = snapshotTags.firstKey();
Expand Down
Loading

0 comments on commit edc7d44

Please sign in to comment.