Skip to content

Commit

Permalink
Add support for alternative charsets and update demo output
Browse files Browse the repository at this point in the history
  • Loading branch information
LTimothy committed Feb 19, 2023
1 parent 9adc19a commit 561576f
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 128 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ and the "double entry". This is useful for identifying and reconciling user inpu
./gradlew shadowJar
```

### Demo
Sample data is included in this distribution. The ID column is `AF`, the first-relevant column is `C`, and the prefix
for double-entry data is `X_`.

### Future Improvements
* Integration tests and unit tests, this was rushed to fulfill an immediate need within the lab.
* Refactoring of code into more classes, the responsibility of classes are somewhat meshed and can be improved.

### Additional Notes
Originally developed in April 2020. Refactored in February 2023. Created for the Family and Culture Lab at
Berkeley, CA as a side-project.
4 changes: 0 additions & 4 deletions SampleData/defaultDataOutput.tsv

This file was deleted.

83 changes: 0 additions & 83 deletions SampleData/fullDataOutput.tsv

This file was deleted.

Binary file added SampleData/output.csv
Binary file not shown.
Binary file added SampleData/output.tsv
Binary file not shown.
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ dependencies {
implementation 'com.google.dagger:dagger:2.45'
annotationProcessor 'com.google.dagger:dagger-compiler:2.45'
implementation 'com.opencsv:opencsv:5.7.1'
implementation 'com.googlecode.juniversalchardet:juniversalchardet:1.0.3'
}

test {
Expand Down
64 changes: 33 additions & 31 deletions src/main/java/org/ltimothy/fclab/data/QualtricsSurvey.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package org.ltimothy.fclab.data;

import com.opencsv.CSVParserBuilder;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.ICSVParser;
import com.opencsv.bean.CsvToBeanBuilder;
import com.opencsv.exceptions.CsvException;
import com.opencsv.exceptions.CsvValidationException;
import lombok.NonNull;
Expand All @@ -11,6 +15,8 @@
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
Expand All @@ -27,18 +33,20 @@ public class QualtricsSurvey {
private final int firstRelevantColumn;
private final String doubleEntryIdPrefix;
private final Map<String, String[]> participantIdToRawData;
private final Charset charset;

private List<String[]> rawDataHeaders;
private List<String[]> processedData;

public QualtricsSurvey(@NonNull final File file, int participantIdColumn, int firstRelevantColumn,
@NonNull final String doubleEntryIdPrefix) {
@NonNull final String doubleEntryIdPrefix, @NonNull Charset charset) {
this.participantIdToRawData = new HashMap<>();
this.rawDataHeaders = new ArrayList<>();
this.processedData = new ArrayList<>();
this.participantIdColumn = participantIdColumn;
this.firstRelevantColumn = firstRelevantColumn;
this.doubleEntryIdPrefix = doubleEntryIdPrefix.toLowerCase();
this.charset = charset;
processFile(file);
}

Expand Down Expand Up @@ -119,39 +127,33 @@ private void loadFile(@NonNull File file) throws IllegalStateException {
final String fileExtension = filePath.substring(filePath.lastIndexOf(".") + 1).toLowerCase();
int headersRemaining = QUALTRICS_HEADERS_TOTAL_LENGTH;

final char delimiter;
if (fileExtension.equals("tsv")) {
try (final BufferedReader br = new BufferedReader(new FileReader(filePath))) {
String line;
while ((line = br.readLine()) != null) {
final String[] fields = line.split("\t", -1);
if (headersRemaining > 0) {
rawDataHeaders.add(fields);
headersRemaining--;
continue;
}
processNonHeaderLine(fields);
}
} catch (final IOException | IndexOutOfBoundsException e) {
log.error("Exception in processing the *.tsv file {}", file, e);
}
delimiter = '\t';
} else if (fileExtension.equals("csv")) {
int i = 0;
try (final CSVReader reader = new CSVReader(new FileReader(filePath))) {
String[] nextLine;
while ((nextLine = reader.readNext()) != null) {
i++;
if (headersRemaining > 0) {
rawDataHeaders.add(nextLine);
headersRemaining--;
continue;
}
processNonHeaderLine(nextLine);
}
} catch (final IOException | CsvValidationException | IndexOutOfBoundsException e) {
log.error("Exception in processing the *.csv file {}", file, e);
}
delimiter = ',';
} else {
log.info("The file selected was of an unsupported file type {}", file);
return;
}

try (final CSVReader reader = new CSVReaderBuilder(new FileReader(filePath, charset))
.withCSVParser(new CSVParserBuilder()
.withQuoteChar(ICSVParser.DEFAULT_QUOTE_CHARACTER)
.withSeparator(delimiter)
.build())
.build()) {
String[] nextLine;
while ((nextLine = reader.readNext()) != null) {
if (headersRemaining > 0) {
rawDataHeaders.add(nextLine);
headersRemaining--;
continue;
}
processNonHeaderLine(nextLine);
}
} catch (final IOException | CsvValidationException | IndexOutOfBoundsException e) {
log.error("Exception in processing the file {}", file, e);
}
}

Expand All @@ -167,7 +169,7 @@ private void processNonHeaderLine(@NonNull String[] fields) throws IndexOutOfBou
participantIdToRawData.put(participantIdLower, fields);
} else {
log.info("Blank participant ID was removed for row with fields {}", Arrays.toString(fields));
DefaultGUI.appendStatusTextArea("Rows with blank participant ids were removed!");
DefaultGUI.appendStatusTextArea("A row with a blank participant id was removed!");
}
}
}
Loading

0 comments on commit 561576f

Please sign in to comment.