Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/UUID clash detector #1628

Draft
wants to merge 19 commits into
base: trunk
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
package org.apache.jackrabbit.oak.upgrade;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.jackrabbit.oak.commons.sort.ExternalSort;
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.spi.state.NodeStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Instant;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

public class UUIDConflictDetector {

private static final Logger log = LoggerFactory.getLogger(UUIDConflictDetector.class);
private final File dir;
private final NodeStore sourceStore;
private final NodeStore targetStore;
private long timeStamp;

public UUIDConflictDetector(NodeStore sourceStore, NodeStore targetStore, File dir) {
this.sourceStore = sourceStore;
this.targetStore = targetStore;
this.dir = dir;
this.timeStamp = 0;
}

// for testing purposes only, not for production usage
public UUIDConflictDetector(NodeStore sourceStore, NodeStore targetStore, File dir, long timeStamp) {
this(sourceStore, targetStore, dir);
this.timeStamp = timeStamp;
}


private void detectConflicts() throws IOException {
File sourceFile = gatherUUIDs(sourceStore.getRoot(), "source");
File targetFile = gatherUUIDs(targetStore.getRoot(), "target");

compareUUIDs(sourceFile, targetFile);
}

public void detectConflicts(String[] includePath) throws IOException {
long startTime = System.currentTimeMillis();
log.info("started detecting uuid conflicts at: {}", startTime);
Set<String> includePaths = dedupePaths(includePath);
if (CollectionUtils.isEmpty(includePaths)) {
log.info("include paths not provided, iterating entire repository to detect conflicts");
detectConflicts();
log.info("uuid conflict detection completed in: {} ms", System.currentTimeMillis() - startTime);
return;
}

File sourceFile = getSourceFileForPaths(includePaths);
File targetFile = gatherUUIDs(targetStore.getRoot(), "target");

compareUUIDs(sourceFile, targetFile);
log.info("uuid conflict detection completed in: {} ms", System.currentTimeMillis() - startTime);
}

private File getSourceFileForPaths(Set<String> includePaths) throws IOException {
long startTime = System.currentTimeMillis();
log.info("starting fetching uuid nodes from source repository at: {}", startTime);
File sourceFile = new File(dir, "source_uuids_" + getTimeStamp() + ".txt");
log.info("source file: {}", sourceFile.getName());
sourceFile.deleteOnExit();
try (BufferedWriter writer = Files.newBufferedWriter(sourceFile.toPath())) {
for (String path : includePaths) {
NodeState state = getNodeAtPath(sourceStore.getRoot(), path);
gatherUUIDs(state, path, writer);
}
}
log.info("fetching uuid nodes completed from source repository in: {} ms", System.currentTimeMillis() - startTime);
return sortFile(sourceFile, "source");
}

private NodeState getNodeAtPath(NodeState node, String path) {
for (String name : path.substring(1).split("/")) {
node = node.getChildNode(name);
}
return node;
}

/* remove the child paths from includePaths if parent path is provided.
* For example,
* includePaths = ["/content/foo/a, /content/foo/b, /content/foo, /content/bar"] will be reduced to
* ["/content/foo, /content/bar"]
* */
private Set<String> dedupePaths(String[] includePaths) {
if (includePaths == null || includePaths.length == 0) {
return Collections.emptySet();
}

Set<String> uniqueIncludePaths = Arrays.stream(includePaths).filter(StringUtils::isNotBlank)
.collect(Collectors.toSet());
Set<String> dedupePaths = new HashSet<>();

// remove child path if parent path is present
for (String currentPath : uniqueIncludePaths) {
String parentPath = currentPath.substring(0, currentPath.lastIndexOf('/'));
if (uniqueIncludePaths.contains(parentPath)) {
dedupePaths.add(parentPath);
} else {
dedupePaths.add(currentPath);
}
}

return dedupePaths;
}


private File gatherUUIDs(NodeState state, String prefix) throws IOException {
long startTime = System.currentTimeMillis();
log.info("starting fetching uuid nodes from {} repository at: {}", prefix, startTime);
File file = new File(dir, prefix + "_uuids_" + getTimeStamp() + ".txt");
log.info("{} uuid file: {}", prefix, file.getName());
file.deleteOnExit();
try (BufferedWriter writer = Files.newBufferedWriter(file.toPath())) {
gatherUUIDs(state, "", writer);
}
log.info("fetching uuid nodes completed from {} repository in: {} ms", prefix, System.currentTimeMillis() - startTime);
return sortFile(file, prefix);
}

private File sortFile(File file, String prefix) throws IOException {
long startTime = System.currentTimeMillis();
log.info("sorting {} started at: {}", file.getName(), startTime);
List<File> sortedFiles = ExternalSort.sortInBatch(file, Comparator.naturalOrder());
log.info("sorting {} file completed in: {} ms", file.getName(), System.currentTimeMillis() - startTime);
File sortedFile = new File(dir, prefix + "_uuids_" + getTimeStamp() + ".txt");
log.info("sorted {} uuid file: {}", prefix, sortedFile.getName());

long mergeFileStartTime = System.currentTimeMillis();
log.info("merging sorted {} files started at: {} ms", prefix, mergeFileStartTime);
// Merge the sorted temporary files into the sortedFile
ExternalSort.mergeSortedFiles(sortedFiles, sortedFile);

log.info("merging sorted {} files completed in: {} ms", prefix, System.currentTimeMillis() - mergeFileStartTime);
return sortedFile;
}

private void gatherUUIDs(NodeState state, String path, BufferedWriter writer) throws IOException {
if (state.hasProperty("jcr:uuid")) {
String uuid = state.getString("jcr:uuid");
writer.write(uuid + " -> " + (StringUtils.isBlank(path) ? "/" : path));
writer.newLine();
}

for (ChildNodeEntry child : state.getChildNodeEntries()) {
gatherUUIDs(child.getNodeState(), path + "/" + child.getName(), writer);
}
}

private void compareUUIDs(File sourceFile, File targetFile) throws IOException {
long startTime = System.currentTimeMillis();
log.info("started uuid conflict comparison in {}, {} files at: {}", sourceFile.getName(), targetFile.getName(), startTime);
Path uuidConflictFilePath = Paths.get(dir.getAbsolutePath(), "uuid_conflicts_" + getTimeStamp() + ".txt");
log.info("uuid conflict file: {}", uuidConflictFilePath.getFileName());
try (BufferedReader sourceReader = Files.newBufferedReader(sourceFile.toPath());
BufferedReader targetReader = Files.newBufferedReader(targetFile.toPath());
BufferedWriter conflictWriter = Files.newBufferedWriter(uuidConflictFilePath)) {

String sourceLine = sourceReader.readLine();
String targetLine = targetReader.readLine();

while (sourceLine != null && targetLine != null) {
String[] sourceLineSplit = sourceLine.split(" -> ");
String[] targetLineSplit = targetLine.split(" -> ");
String sourceUUID = sourceLineSplit[0];
String sourcePath = sourceLineSplit[1];
String targetUUID = targetLineSplit[0];
String targetPath = targetLineSplit[1];

int comparison = sourceUUID.compareTo(targetUUID);
if (comparison < 0) {
sourceLine = sourceReader.readLine();
} else if (comparison > 0) {
targetLine = targetReader.readLine();
} else {
if (!StringUtils.equals(sourcePath, targetPath)) {
log.info("conflict found for uuid: {}, source path: {}, target path: {}", sourceUUID, sourcePath, targetPath);
String uuidWithPaths = sourceUUID + ": " + sourcePath + " " + targetPath;
conflictWriter.write(uuidWithPaths);
conflictWriter.newLine();
}
sourceLine = sourceReader.readLine();
targetLine = targetReader.readLine();
}
}
}
log.info("uuid conflict comparison in {}, {} files completed in: {} ms", sourceFile.getName(), targetFile.getName(), System.currentTimeMillis() - startTime);
}

public long getTimeStamp() {
return timeStamp == 0L ? Instant.now().toEpochMilli() : timeStamp;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,33 @@
*/
package org.apache.jackrabbit.oak.upgrade.cli;

import java.io.IOException;
import java.util.List;
import java.util.ServiceLoader;

import javax.jcr.RepositoryException;

import joptsimple.OptionSet;
import org.apache.jackrabbit.guava.common.collect.Lists;
import org.apache.jackrabbit.guava.common.io.Closer;

import joptsimple.OptionSet;
import org.apache.jackrabbit.oak.spi.blob.BlobStore;
import org.apache.jackrabbit.oak.spi.lifecycle.CompositeInitializer;
import org.apache.jackrabbit.oak.spi.lifecycle.RepositoryInitializer;
import org.apache.jackrabbit.oak.spi.state.NodeStore;
import org.apache.jackrabbit.oak.upgrade.UUIDConflictDetector;
import org.apache.jackrabbit.oak.upgrade.cli.parser.CliArgumentException;
import org.apache.jackrabbit.oak.upgrade.cli.parser.DatastoreArguments;
import org.apache.jackrabbit.oak.upgrade.cli.parser.MigrationCliArguments;
import org.apache.jackrabbit.oak.upgrade.cli.parser.MigrationOptions;
import org.apache.jackrabbit.oak.upgrade.cli.parser.OptionParserFactory;
import org.apache.jackrabbit.oak.upgrade.cli.parser.StoreArguments;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.jcr.RepositoryException;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.ServiceLoader;

public class OakUpgrade {

private static final Logger log = LoggerFactory.getLogger(OakUpgrade.class);

public static void main(String... args) throws IOException {
OptionSet options = OptionParserFactory.create().parse(args);
try {
Expand All @@ -45,6 +51,12 @@ public static void main(String... args) throws IOException {
CliUtils.displayUsage();
return;
}

if (cliArguments.hasOption(OptionParserFactory.CHECK_UUID_CONFLICT)) {
checkUUIDConflicts(cliArguments);
return;
}

migrate(cliArguments);
} catch(CliArgumentException e) {
if (e.getMessage() != null) {
Expand Down Expand Up @@ -98,4 +110,29 @@ private static RepositoryInitializer createCompositeInitializer() {
return new CompositeInitializer(initializers);
}

private static void checkUUIDConflicts(MigrationCliArguments argumentParser) throws CliArgumentException, IOException {
MigrationOptions migrationOptions = new MigrationOptions(argumentParser);
migrationOptions.logOptions();

StoreArguments stores = new StoreArguments(migrationOptions, argumentParser.getArguments());
stores.logOptions();

boolean srcEmbedded = stores.srcUsesEmbeddedDatastore();
DatastoreArguments datastores = new DatastoreArguments(migrationOptions, stores, srcEmbedded);

try (Closer closer = Closer.create()) {
CliUtils.handleSigInt(closer);
BlobStore srcBlobStore = datastores.getSrcBlobStore().create(closer);
NodeStore sourceNodeStore = stores.getSrcStore().create(srcBlobStore, closer);

BlobStore dstBlobStore = datastores.getDstBlobStore(srcBlobStore).create(closer);
NodeStore targetNodeStore = stores.getDstStore().create(dstBlobStore, closer);

UUIDConflictDetector uuidConflictDetector = new UUIDConflictDetector(sourceNodeStore, targetNodeStore, new File("/tmp"));
uuidConflictDetector.detectConflicts(migrationOptions.getIncludePaths());
} catch (Exception e) {
log.error("Error while checking for uuid clashes: ", e);
throw e;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ public class OptionParserFactory {
public static final String FORCE_CHECKPOINTS = "force-checkpoints";

public static final String ADD_SECONDARY_METADATA = "add-secondary-metadata";
public static final String CHECK_UUID_CONFLICT = "check-uuid-conflicts";

public static OptionParser create() {
OptionParser op = new OptionParser();
Expand Down Expand Up @@ -174,5 +175,6 @@ private static void addMiscOptions(OptionParser op) {
op.accepts(SKIP_CHECKPOINTS, "Don't copy checkpoints on the full segment->segment migration");
op.accepts(FORCE_CHECKPOINTS, "Copy checkpoints even if the --include,exclude,merge-paths option is specified");
op.accepts(ADD_SECONDARY_METADATA, "Adds the metadata required by secondary store");
op.accepts(CHECK_UUID_CONFLICT, "Checks conflicts for UUID at source and target repositories");
}
}
22 changes: 22 additions & 0 deletions oak-upgrade/src/main/resources/logback.xml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,29 @@
</encoder>
</appender>

<appender name="FILE" class="ch.qos.logback.core.FileAppender">
<file>/tmp/oak-upgrade.log</file>
<append>true</append>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>

<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>/tmp/logs/oak-upgrade.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>/tmp/logs/oak-upgrade.log.%d{yyyy-MM-dd}.log</fileNamePattern>
<!-- keep 30 days' worth of history -->
<maxHistory>30</maxHistory>
</rollingPolicy>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss} %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>

<root level="info">
<appender-ref ref="CONSOLE" />
<appender-ref ref="FILE" />
</root>
</configuration>
Loading