Refactor code to use Gradle, Log4j, Dagger, and Lombok, and fix a bug…

… with writing to *.csv and *.tsv
LTimothy · Feb 19, 2023 · 9adc19a · 9adc19a
1 parent df088ad
commit 9adc19a
Show file tree

Hide file tree

Showing 4 changed files with 514 additions and 49 deletions.
diff --git a/README.md b/README.md
@@ -1,30 +1,28 @@
 # Double Entry Validation Tool
 
 ### Background
-A tool which, given Qualtrics survey exports, can compare different survey entries (for the same participant data) and efficiently identify mismatched points. This is useful for "double entry" purposes where if the survey administrator decides to enter some data on Qualtrics twice for error checking purposes, the administrator can identify whether or not all points of data in one record correspond to all points of data in the other record. This is useful for identifying user input error.
+A tool which, given Qualtrics survey exports, can compare different survey entries (for the same participant data) and 
+efficiently identify mismatched points. This is useful for "double entry" purposes where if the survey administrator 
+decides to enter some data on Qualtrics twice for error checking purposes, the administrator can identify whether or not 
+all points of data in one record correspond to all points of data in the other record. This is useful for identifying 
+user input error.
 
-### Features
-* Takes in a (.tsv) Qualtrics export file, compares double-entry rows, and identifies mismatched data. 
-* Exports mismatched data to a (.tsv) save file in a simplified format or a full-file format compatable with Qualtrics.
-* Basic GUI (user interface).
+This tool takes in a *.tsv or *.csv export from Qualtrics, and compares rows of data based on a unique participant ID
+column, a column identifier to begin comparisons at, and a "double entry" participant ID prefix. Through the GUI, users 
+can efficiently analyze data reports from Qualtrics to identify discrepancies between the first entry of row of data, 
+and the "double entry". This is useful for identifying and reconciling user input data.
 
-### Instructions
-* Download latest release for your platform. Instructions can be found in the app.
-* Sample data is included in this distribution. The ID column is *31* and the prefix for double-entry data is *X_*.
+### Features
+* Takes in a *.tsv or *.csv Qualtrics export file, compares double-entry rows, and identifies mismatched data. 
+* Exports mismatched data to a file compatible with Qualtrics for import.
+* Basic user interface is provided for broad usability.
 
-### To-Do
-* More Abstraction
-* GUI update
-* Documentation and comments
-* Improved analysis algorithm
-* Advanced Options (e.g. prefix/postfix combination, delimiter choice, remove double-entry rows etc.)
-* Logic selection (e.g. Qualtrics, REDCap, etc.)
-* Additional file support (automatically save and open different file extensions)
-* Allow on-the-fly data correction
-* Improved instructions
-* Parallelism and multithreading support
+### Execution
+```
+./gradlew build
+./gradlew shadowJar
+```
 
 ### Additional Notes
-Work in progress. Created for the Family and Culture Lab at Berkeley, CA as a side-project.
-
-Updated 4/24/20
+Originally developed in April 2020. Refactored in February 2023. Created for the Family and Culture Lab at 
+Berkeley, CA as a side-project.
diff --git a/build.gradle b/build.gradle
@@ -2,11 +2,15 @@ plugins {
     id 'java'
     id 'idea'
     id "io.freefair.lombok" version "6.6.2"
+    id 'application'
+    id 'com.github.johnrengelman.shadow' version '7.1.0'
 }
 
 group 'org.ltimothy.fclab'
 version '1.0-SNAPSHOT'
 
+mainClassName = 'org.ltimothy.fclab.DoubleEntry'
+
 repositories {
     mavenCentral()
 }
@@ -19,12 +23,23 @@ dependencies {
     implementation 'org.apache.logging.log4j:log4j-slf4j-impl:2.19.0'
     implementation 'com.google.dagger:dagger:2.45'
     annotationProcessor 'com.google.dagger:dagger-compiler:2.45'
+    implementation 'com.opencsv:opencsv:5.7.1'
 }
 
 test {
     useJUnitPlatform()
 }
 
+jar {
+    manifest {
+        attributes 'Main-Class': mainClassName
+    }
+}
+
+shadowJar {
+    mergeServiceFiles()
+}
+
 tasks.withType(JavaCompile) {
     options.compilerArgs += ["-Adagger.formatGeneratedSource=disabled"]
 }
diff --git a/src/main/java/org/ltimothy/fclab/data/QualtricsSurvey.java b/src/main/java/org/ltimothy/fclab/data/QualtricsSurvey.java
@@ -0,0 +1,173 @@
+package org.ltimothy.fclab.data;
+
+import com.opencsv.CSVReader;
+import com.opencsv.exceptions.CsvException;
+import com.opencsv.exceptions.CsvValidationException;
+import lombok.NonNull;
+import lombok.extern.slf4j.Slf4j;
+import org.ltimothy.fclab.gui.DefaultGUI;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+@Slf4j
+public class QualtricsSurvey {
+    private static final int QUALTRICS_PRIMARY_HEADER_INDEX = 0;
+    private static final int QUALTRICS_HEADERS_TOTAL_LENGTH = 3;
+    private static final String NO_MISMATCH_TEXT = "OK";
+
+    private final int participantIdColumn;
+    private final int firstRelevantColumn;
+    private final String doubleEntryIdPrefix;
+    private final Map<String, String[]> participantIdToRawData;
+
+    private List<String[]> rawDataHeaders;
+    private List<String[]> processedData;
+
+    public QualtricsSurvey(@NonNull final File file, int participantIdColumn, int firstRelevantColumn,
+                           @NonNull final String doubleEntryIdPrefix) {
+        this.participantIdToRawData = new HashMap<>();
+        this.rawDataHeaders = new ArrayList<>();
+        this.processedData = new ArrayList<>();
+        this.participantIdColumn = participantIdColumn;
+        this.firstRelevantColumn = firstRelevantColumn;
+        this.doubleEntryIdPrefix = doubleEntryIdPrefix.toLowerCase();
+        processFile(file);
+    }
+
+    public List<String[]> getExportData() {
+        final List<String[]> exportData = new ArrayList<>();
+        exportData.addAll(rawDataHeaders);
+        exportData.addAll(processedData);
+        return exportData;
+    }
+
+    private void processFile(@NonNull File file) throws IllegalStateException {
+        loadFile(file);
+
+        try {
+            for (final String participantId : participantIdToRawData.keySet()) {
+                final String doubleEntryParticipantId = doubleEntryIdPrefix.concat(participantId);
+                final String originalEntryParticipantId = participantId.substring(doubleEntryIdPrefix.length());
+                if (!participantId.startsWith(doubleEntryIdPrefix)) {
+                    if (participantIdToRawData.containsKey(doubleEntryParticipantId)){
+                        final String[] originalEntryRawData = participantIdToRawData.get(participantId);
+                        final String[] doubleEntryData =
+                                Arrays.copyOf(participantIdToRawData.get(doubleEntryParticipantId),
+                                        originalEntryRawData.length);
+
+                        printAnalysisHeader(participantId);
+                        for (int i = firstRelevantColumn; i < originalEntryRawData.length; i++) {
+                            final String originalValue = originalEntryRawData[i];
+                            final String doubleEntryValue = doubleEntryData[i];
+                            if (!originalValue.trim().equalsIgnoreCase(doubleEntryValue.trim())) {
+                                printMismatch(rawDataHeaders.get(QUALTRICS_PRIMARY_HEADER_INDEX)[i], originalValue,
+                                        doubleEntryValue);
+                            } else {
+                                doubleEntryData[i] = NO_MISMATCH_TEXT;
+                            }
+                        }
+
+                        processedData.add(originalEntryRawData);
+                        processedData.add(doubleEntryData);
+                    } else {
+                        processedData.add(participantIdToRawData.get(participantId));
+                    }
+                } else if (participantId.startsWith(doubleEntryIdPrefix) &&
+                        !participantIdToRawData.containsKey(originalEntryParticipantId)){
+                    printAnalysisHeader(participantId);
+                    log.warn("There was no entry {} for double-entry {}", originalEntryParticipantId, participantId);
+                    DefaultGUI.appendStatusTextArea("[Warning] There was no entry " +
+                            originalEntryParticipantId + " for double-entry " + participantId);
+                    processedData.add(participantIdToRawData.get(participantId));
+                }
+            }
+        } catch (final IndexOutOfBoundsException e) {
+            log.error("Column identifiers are invalid!");
+            throw new IllegalStateException("Column identifiers are invalid!");
+        }
+    }
+
+    private void printAnalysisHeader(@NonNull final String participantId) {
+        int boxWidth = 50;
+        int padding = (boxWidth - participantId.length()) / 2;
+        boolean isOdd = participantId.length() % 2 == 1;
+
+        DefaultGUI.appendStatusTextArea("+" + "-".repeat(boxWidth - 2) + "+");
+        DefaultGUI.appendStatusTextArea("|" + " ".repeat(boxWidth - 2) + "|");
+        DefaultGUI.appendStatusTextArea(String.format("|%" + padding + "s%s%" + (isOdd ? padding - 1 : padding) +
+                "s|\n", "", participantId, ""));
+        DefaultGUI.appendStatusTextArea("|" + " ".repeat(boxWidth - 2) + "|");
+        DefaultGUI.appendStatusTextArea("+" + "-".repeat(boxWidth - 2) + "+");
+    }
+
+    private void printMismatch(@NonNull final String columnHeader, @NonNull final String originalValue,
+                               @NonNull final String doubleEntryValue) {
+        DefaultGUI.appendStatusTextArea("[Mismatch] " + columnHeader + " originally was \"" +
+                originalValue.trim() + "\" but was \"" + doubleEntryValue.trim() + "\" in the double entry.");
+    }
+
+    private void loadFile(@NonNull File file) throws IllegalStateException {
+        final String filePath = file.getPath();
+        final String fileExtension = filePath.substring(filePath.lastIndexOf(".") + 1).toLowerCase();
+        int headersRemaining = QUALTRICS_HEADERS_TOTAL_LENGTH;
+
+        if (fileExtension.equals("tsv")) {
+            try (final BufferedReader br = new BufferedReader(new FileReader(filePath))) {
+                String line;
+                while ((line = br.readLine()) != null) {
+                    final String[] fields = line.split("\t", -1);
+                    if (headersRemaining > 0) {
+                        rawDataHeaders.add(fields);
+                        headersRemaining--;
+                        continue;
+                    }
+                    processNonHeaderLine(fields);
+                }
+            } catch (final IOException | IndexOutOfBoundsException e) {
+                log.error("Exception in processing the *.tsv file {}", file, e);
+            }
+        } else if (fileExtension.equals("csv")) {
+            int i = 0;
+            try (final CSVReader reader = new CSVReader(new FileReader(filePath))) {
+                String[] nextLine;
+                while ((nextLine = reader.readNext()) != null) {
+                    i++;
+                    if (headersRemaining > 0) {
+                        rawDataHeaders.add(nextLine);
+                        headersRemaining--;
+                        continue;
+                    }
+                    processNonHeaderLine(nextLine);
+                }
+            } catch (final IOException | CsvValidationException | IndexOutOfBoundsException e) {
+                log.error("Exception in processing the *.csv file {}", file, e);
+            }
+        } else {
+            log.info("The file selected was of an unsupported file type {}", file);
+        }
+    }
+
+    private void processNonHeaderLine(@NonNull String[] fields) throws IndexOutOfBoundsException,
+            IllegalStateException {
+        final String participantId = fields[participantIdColumn];
+        final String participantIdLower = fields[participantIdColumn].toLowerCase();
+        if (participantIdToRawData.containsKey(participantIdLower)) {
+            log.error("Participant ID must be unique! At least one duplicate with ID {} was found.", participantId);
+            throw new IllegalStateException("Participant ID must be unique! At least one duplicate with ID" +
+                    participantId + " was found.");
+        } else if (!participantIdLower.isBlank()){
+            participantIdToRawData.put(participantIdLower, fields);
+        } else {
+            log.info("Blank participant ID was removed for row with fields {}", Arrays.toString(fields));
+            DefaultGUI.appendStatusTextArea("Rows with blank participant ids were removed!");
+        }
+    }
+}