Skip to content

Commit

Permalink
Merge pull request #197 from jpedroh/feat-files-comparison
Browse files Browse the repository at this point in the history
feat: Add more Data Collectors for Generic Merge study
  • Loading branch information
pauloborba authored Aug 8, 2024
2 parents 82ddfda + 05744b1 commit 1e00670
Show file tree
Hide file tree
Showing 10 changed files with 284 additions and 3 deletions.
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,5 @@ dependencies {
testRuntimeOnly 'org.junit.vintage:junit-vintage-engine:5.3.1'
implementation 'org.apache.logging.log4j:log4j-api:2.23.1'
implementation 'org.apache.logging.log4j:log4j-core:2.23.1'
implementation 'org.json:json:20210307'
}
8 changes: 8 additions & 0 deletions src/main/injectors/GenericMergeModule.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ import org.apache.logging.log4j.Logger
import services.commitFilters.MutuallyModifiedFilesCommitFilter
import services.dataCollectors.GenericMerge.GenericMergeConfig
import services.dataCollectors.GenericMerge.GenericMergeDataCollector
import services.dataCollectors.GenericMerge.MergeConflictsComparator
import services.dataCollectors.GenericMerge.MergeToolsComparator
import services.dataCollectors.GenericMerge.ScenarioLOCsCounter
import services.dataCollectors.GenericMerge.UnstructuredMergeCollector
import services.outputProcessors.GenericMergeDataOutputProcessor
import services.projectProcessors.DummyProjectProcessor
import services.util.ci.CIPlatform
Expand All @@ -28,7 +32,11 @@ class GenericMergeModule extends AbstractModule {
projectProcessorBinder.addBinding().to(DummyProjectProcessor.class)

Multibinder<DataCollector> dataCollectorBinder = Multibinder.newSetBinder(binder(), DataCollector.class)
dataCollectorBinder.addBinding().to(ScenarioLOCsCounter.class)
dataCollectorBinder.addBinding().to(GenericMergeDataCollector.class)
dataCollectorBinder.addBinding().to(MergeToolsComparator.class)
dataCollectorBinder.addBinding().to(MergeConflictsComparator.class)
dataCollectorBinder.addBinding().to(UnstructuredMergeCollector.class)

Multibinder<OutputProcessor> outputProcessorBinder = Multibinder.newSetBinder(binder(), OutputProcessor.class)
outputProcessorBinder.addBinding().to(GenericMergeDataOutputProcessor.class)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ class GenericMergeConfig {

public static final GENERIC_MERGE_REPORT_PATH = "${BASE_EXPERIMENT_PATH}/output/reports"
public static final GENERIC_MERGE_REPORT_FILE_NAME = "${GENERIC_MERGE_REPORT_PATH}/generic-merge-execution.csv"
public static final GENERIC_MERGE_REPORT_FILES_EQUIVALENT = "${GENERIC_MERGE_REPORT_PATH}/generic-merge-output-equivalent.csv"
public static final GENERIC_MERGE_REPORT_MERGE_CONFLICTS = "${GENERIC_MERGE_REPORT_PATH}/generic-merge-merge-conflicts.csv"
public static final GENERIC_MERGE_REPORT_COMMITS_FILE_NAME = "${GENERIC_MERGE_REPORT_PATH}/generic-merge-execution-commits.csv"
public static final GENERIC_MERGE_REPORT_SCENARIO_LOCS_FILE_NAME = "${GENERIC_MERGE_REPORT_PATH}/generic-merge-scenario-locs.csv"
public static final GENERIC_MERGE_REPORT_UNSTRUCTURED_TIMES_FILE_NAME = "${GENERIC_MERGE_REPORT_PATH}/generic-merge-unstructured-times.csv"

public static final String GENERIC_MERGE_BINARY_PATH = "${BASE_EXPERIMENT_PATH}/tools/generic-merge"
public static final String JDIME_BINARY_PATH = "${BASE_EXPERIMENT_PATH}/tools/jdime/install/JDime/bin"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package services.dataCollectors.GenericMerge


import interfaces.DataCollector
import org.apache.logging.log4j.LogManager
import org.apache.logging.log4j.Logger
import project.MergeCommit
import project.Project
import services.dataCollectors.S3MMergesCollector.MergeScenarioCollector
import services.mergeScenariosFilters.NonFastForwardMergeScenarioFilter
import services.util.MergeConflict
import util.CsvUtils

import java.nio.file.Files
import java.nio.file.Path
import java.util.stream.Stream

class MergeConflictsComparator implements DataCollector {
private static Logger LOG = LogManager.getLogger(MergeConflictsComparator.class)

private static final String GENERIC_MERGE_FILE_NAME = "merge.generic_merge.java"
private static final String JDIME_FILE_NAME = "merge.jdime.java"

@Override
void collectData(Project project, MergeCommit mergeCommit) {
LOG.trace("Starting execution of Merge Conflicts Comparator on project ${project.getName()} and merge commit ${mergeCommit.getSHA()}")

def conflictsComparisons = MergeScenarioCollector.collectMergeScenarios(project, mergeCommit)
.parallelStream()
.filter(NonFastForwardMergeScenarioFilter::isNonFastForwardMergeScenario)
.filter(MergeConflictsComparator::hasResponseFromBothTools)
.filter(MergeConflictsComparator::hasConflictsInBothTools)
.map(MergeConflictsComparator::extractConflictsFromFiles)
.flatMap(MergeConflictsComparator::compareMergeConflicts(project, mergeCommit))
.map(CsvUtils::toCsvRepresentation)

def reportFile = new File(GenericMergeConfig.GENERIC_MERGE_REPORT_MERGE_CONFLICTS)
def fileContent = conflictsComparisons.collect(CsvUtils.asLines())
if (fileContent.isBlank() || fileContent.isEmpty()) {
LOG.trace("Finished execution of Merge Conflicts Comparator on project ${project.getName()} and merge commit ${mergeCommit.getSHA()} without conflicts")
return
}
reportFile << fileContent << "\n"

LOG.trace("Finished execution of Merge Conflicts Comparator on project ${project.getName()} and merge commit ${mergeCommit.getSHA()}")
}

private static boolean hasResponseFromBothTools(Path scenario) {
LOG.trace("Checking if has response from both tools for ${scenario.toString()}")
return Files.exists(scenario.resolve(GENERIC_MERGE_FILE_NAME)) && Files.exists(scenario.resolve(JDIME_FILE_NAME))
}

private static boolean hasConflictsInBothTools(Path scenario) {
LOG.trace("Checking if both files have conflicts")
return MergeConflict.getConflictsNumber(scenario.resolve(GENERIC_MERGE_FILE_NAME)) > 0 && MergeConflict.getConflictsNumber(scenario.resolve(JDIME_FILE_NAME)) > 0
}

private static Tuple3<Path, Set<MergeConflict>, Set<MergeConflict>> extractConflictsFromFiles(Path scenario) {
LOG.trace("Extracting conflicts from files in ${scenario.toString()}")
return new Tuple3(scenario, MergeConflict.extractMergeConflicts(scenario.resolve(GENERIC_MERGE_FILE_NAME)),
MergeConflict.extractMergeConflicts(scenario.resolve(JDIME_FILE_NAME)))
}

private static Closure<Stream<List<String>>> compareMergeConflicts(Project project, MergeCommit mergeCommit) {
return (Tuple3<Path, Set<MergeConflict>, Set<MergeConflict>> mergeConflicts) -> {
def scenario = mergeConflicts.getV1()
def genericMergeConflicts = mergeConflicts.getV2()
def jDimeConflicts = mergeConflicts.getV3()

return genericMergeConflicts.withIndex().parallelStream().flatMap(genericMergeTuple -> {
def genericMergeConflict = genericMergeTuple.getV1()
def i = genericMergeTuple.getV2()

return jDimeConflicts.withIndex().parallelStream().map(jDimeTuple -> {
def jDimeConflict = jDimeTuple.getV1()
def j = jDimeTuple.getV2()

LOG.trace("Checking if conflicts generic_merge_conflict_${i} and jdime_conflict_${j} are equal")

return [project.getName(),
mergeCommit.getSHA(),
scenario.toString(),
"generic_merge_conflict_${i}",
"jdime_conflict_${j}",
genericMergeConflict.equals(jDimeConflict).toString()]
})
})
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package services.dataCollectors.GenericMerge

import interfaces.DataCollector
import org.apache.logging.log4j.LogManager
import org.apache.logging.log4j.Logger
import project.MergeCommit
import project.Project
import services.dataCollectors.S3MMergesCollector.MergeScenarioCollector
import services.mergeScenariosFilters.NonFastForwardMergeScenarioFilter
import util.CsvUtils

import java.nio.file.Path

class MergeToolsComparator implements DataCollector {
private static Logger LOG = LogManager.getLogger(MergeToolsComparator.class)

@Override
void collectData(Project project, MergeCommit mergeCommit) {
LOG.trace("Starting execution of Merge Tools Comparator on project ${project.getName()} and merge commit ${mergeCommit.getSHA()}")

def results = MergeScenarioCollector.collectMergeScenarios(project, mergeCommit)
.parallelStream()
.filter(NonFastForwardMergeScenarioFilter::isNonFastForwardMergeScenario)
.map(scenario -> checkIfOutputsAreEquivalent(project, mergeCommit, scenario))
.map(CsvUtils::toCsvRepresentation)

def reportFile = new File(GenericMergeConfig.GENERIC_MERGE_REPORT_FILES_EQUIVALENT)
reportFile << results.collect(CsvUtils.asLines()) << "\n"

LOG.trace("Finished execution of Merge Tools Comparator on project ${project.getName()} and merge commit ${mergeCommit.getSHA()}")
}

private static List<String> checkIfOutputsAreEquivalent(Project project, MergeCommit mergeCommit, Path scenario) {
LOG.trace("Starting to check if output for ${scenario.toString()} are equivalents")

def genericMergePath = scenario.resolve("merge.generic_merge.java")
def jDimePath = scenario.resolve("merge.jdime.java")

def result = [project.getName(),
mergeCommit.getSHA(),
scenario.toString(),
FileSyntacticDiff.areFilesSyntacticallyEquivalent(genericMergePath, jDimePath).toString()]

LOG.trace("Finished checking if output for ${scenario.toString()} are equivalents")
return result
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package services.dataCollectors.GenericMerge

import interfaces.DataCollector
import org.apache.logging.log4j.LogManager
import org.apache.logging.log4j.Logger
import org.json.JSONObject
import project.MergeCommit
import project.Project
import services.dataCollectors.S3MMergesCollector.MergeScenarioCollector
import services.mergeScenariosFilters.NonFastForwardMergeScenarioFilter
import util.CsvUtils
import util.ProcessRunner

import java.nio.file.Path

/**
* Counts the number of LOCs (ignoring comments and blank lines) in base, left and right.
* It assumes that there's an executable for cloc in ./dependencies (symbolic links are allowed).*/
class ScenarioLOCsCounter implements DataCollector {
private static Logger LOG = LogManager.getLogger(MergeToolsComparator.class)

@Override
void collectData(Project project, MergeCommit mergeCommit) {
def scenarios = MergeScenarioCollector.collectMergeScenarios(project, mergeCommit)
.parallelStream()
.filter(NonFastForwardMergeScenarioFilter::isNonFastForwardMergeScenario)
.map((scenario) -> {
LOG.trace("Starting to count LOCs in ${scenario.toString()}")
def base = countLinesOfCodeInFile(scenario.resolve("base.java"))
def left = countLinesOfCodeInFile(scenario.resolve("left.java"))
def right = countLinesOfCodeInFile(scenario.resolve("right.java"))
def total = base + left + right

return [project.getName(), mergeCommit.getSHA(), scenario.toString(), base.toString(), left.toString(), right.toString(), total.toString()]
})
.map(CsvUtils::toCsvRepresentation)

def reportFile = new File(GenericMergeConfig.GENERIC_MERGE_REPORT_SCENARIO_LOCS_FILE_NAME)
reportFile << scenarios.collect(CsvUtils.asLines()) << "\n"
}

private static int countLinesOfCodeInFile(Path file) {
def clocProcessBuilder = ProcessRunner.buildProcess("./dependencies",
"./cloc",
file.toAbsolutePath().toString(),
"--json")

def output = ProcessRunner.startProcess(clocProcessBuilder)
output.waitFor()

def jsonOutput = new JSONObject(output.getInputStream().readLines().join('\n'))
int sumCode = jsonOutput.getJSONObject("SUM").getInt("code")

return sumCode
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package services.dataCollectors.GenericMerge

import interfaces.DataCollector
import project.MergeCommit
import project.Project
import services.dataCollectors.S3MMergesCollector.MergeScenarioCollector
import services.mergeScenariosFilters.NonFastForwardMergeScenarioFilter
import util.CsvUtils
import util.ProcessRunner

import java.nio.file.Path

class UnstructuredMergeCollector implements DataCollector {
@Override
void collectData(Project project, MergeCommit mergeCommit) {
def scenarios = MergeScenarioCollector.collectMergeScenarios(project, mergeCommit)
.stream()
.filter(NonFastForwardMergeScenarioFilter::isNonFastForwardMergeScenario)
.map(scenario -> {
def executionTime = runGitMergeFile(scenario)
return [project.getName(), mergeCommit.getSHA(), scenario.toString(), executionTime]
})
.map(CsvUtils::toCsvRepresentation)

def reportFile = new File(GenericMergeConfig.GENERIC_MERGE_REPORT_UNSTRUCTURED_TIMES_FILE_NAME)
reportFile << scenarios.collect(CsvUtils.asLines()) << "\n"
}

private static long runGitMergeFile(Path scenario) {
def executionTimes = new ArrayList<Long>()

for (int i = 0; i < GenericMergeConfig.NUMBER_OF_EXECUTIONS; i++) {
long startTime = System.nanoTime()

def processBuilder = ProcessRunner.buildProcess(GenericMergeConfig.BASE_EXPERIMENT_PATH,
"git",
"merge-file",
scenario.resolve("left.java").toString(),
scenario.resolve("base.java").toString(),
scenario.resolve("right.java").toString())
ProcessRunner.startProcess(processBuilder).waitFor()

long endTime = System.nanoTime()
// If we're running more than one execution, we use the first one as a warm up
if (GenericMergeConfig.NUMBER_OF_EXECUTIONS == 1 || i > 0) {
executionTimes.add(endTime - startTime)
}
}

return (long) (executionTimes.stream().reduce(0, (prev, cur) -> prev + cur) / executionTimes.size())
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class NonFastForwardMergeScenarioFilter {
return mergeScenarios.stream().filter(this::isNonFastForwardMergeScenario).collect(Collectors.toList())
}

private static boolean isNonFastForwardMergeScenario(Path mergeScenario) {
public static boolean isNonFastForwardMergeScenario(Path mergeScenario) {
Path leftFile = getInvolvedFile(mergeScenario, 'left')
Path baseFile = getInvolvedFile(mergeScenario, 'base')
Path rightFile = getInvolvedFile(mergeScenario, 'right')
Expand Down
4 changes: 2 additions & 2 deletions src/main/services/util/MergeConflict.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ class MergeConflict {
Right
}

public static MINE_CONFLICT_MARKER = "<<<<<<<MINE"
public static MINE_CONFLICT_MARKER = "<<<<<<<"
public static BASE_CONFLICT_MARKER = "|||||||"
public static YOURS_CONFLICT_MARKER = ">>>>>>>YOURS"
public static YOURS_CONFLICT_MARKER = ">>>>>>>"
public static CHANGE_CONFLICT_MARKER = "======="
public static SIMPLE_CONFLICT_MARKER = "<<<<<<<"

Expand Down
23 changes: 23 additions & 0 deletions src/main/util/CsvUtils.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package util

import java.util.stream.Collectors

class CsvUtils {
/**
* Converts a list into a CSV row.
* @param List<String> items
* @return The row separated by commas
*/
static String toCsvRepresentation(List<String> items) {
return items.join(',').replaceAll('\\\\', '/')
}

/**
* Collects a Stream of Strings into a single String separated by line breaks
* @param lines
* @return
*/
static asLines() {
return Collectors.joining(System.lineSeparator())
}
}

0 comments on commit 1e00670

Please sign in to comment.