thehyve · EIjo · May 22, 2024 · May 23, 2024 · Jul 17, 2024 · Jul 17, 2024
diff --git a/README.md b/README.md
@@ -37,6 +37,11 @@ Technology
 White Rabbit and Rabbit in a Hat are pure Java applications. Both applications use [Apache's POI Java libraries](http://poi.apache.org/) to read and write Word and Excel files. 
 White Rabbit uses JDBC to connect to the respective databases.
 
+Intended use
+============
+Whte Rabbit and Rabbit In A hat were designed and implemented for use within a secure and trusted environment. No efforts have been made to
+encrypt or otherwise protect the passwords, parameters and results. This should be kept in mind when deploying these tools.
+
 System Requirements
 ============
 Requires Java 1.8 or higher for running, and read access to the database to be scanned. Java can be downloaded from

diff --git a/rabbit-core/pom.xml b/rabbit-core/pom.xml
@@ -145,7 +145,7 @@
         <dependency>
             <groupId>com.amazon.redshift</groupId>
             <artifactId>redshift-jdbc42</artifactId>
-            <version>2.1.0.25</version>
+            <version>2.1.0.28</version>
         </dependency>
         <!-- https://mvnrepository.com/artifact/org.apache.avro/avro -->
         <dependency>

diff --git a/rabbit-core/src/main/java/org/ohdsi/utilities/files/ReadCsvFile.java b/rabbit-core/src/main/java/org/ohdsi/utilities/files/ReadCsvFile.java
@@ -0,0 +1,155 @@
+/*******************************************************************************
+ * Copyright 2019 Observational Health Data Sciences and Informatics
+ * 
+ * This file is part of WhiteRabbit
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.ohdsi.utilities.files;
+
+import java.io.*;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+public class ReadCsvFile extends ReadTextFile{
+  public String filename;
+  protected BufferedReader bufferedReader;
+  public boolean EOF = false;
+  public long charCount;
+  public long fileSize;
+  public int splits;
+  public int currentSplit;
+  public long linesRead;
+  public int sampleSize;
+  public int charSkipped;
+
+  public ReadCsvFile(String filename, int sampleSize, int splits) {
+    super(filename);
+    this.filename = filename;
+    try {
+      FileInputStream inputStream = new FileInputStream(filename);
+      bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
+    } catch (FileNotFoundException e) {
+      e.printStackTrace();
+    } catch (UnsupportedEncodingException e) {
+      System.err.println("Computer does not support UTF-8 encoding");
+      e.printStackTrace();
+    }
+
+    this.sampleSize = sampleSize;
+    this.currentSplit = 0;
+    this.linesRead = 0;
+    this.charCount = 0;
+    this.charSkipped = 0;
+    this.splits = splits;
+
+    try {
+      this.fileSize = Files.size(Paths.get(filename));
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+
+
+  public Iterator<String> getIterator() {
+    return iterator();
+  }
+
+  public List<String> loadFromFileInBatches(Integer batchsize) {
+    List<String> result = new ArrayList<String>();
+    if (!EOF) {
+      try {
+        int i = 0;
+        while (!EOF && i++ < batchsize) {
+          String nextLine = bufferedReader.readLine();
+          if (nextLine == null)
+            EOF = true;
+          else
+            result.add(nextLine);
+        }
+        if (EOF) {
+          bufferedReader.close();
+        }
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+    }
+    return result;
+  }
+
+  private class CsvFileIterator implements Iterator<String> {
+    private String buffer;
+
+    public CsvFileIterator() {
+      try {
+        buffer = bufferedReader.readLine();
+        if(buffer == null | linesRead > sampleSize) {
+          EOF = true;
+          bufferedReader.close();
+        }
+      }
+      catch (IOException e) {
+        e.printStackTrace();
+      }
+
+    }
+
+    public boolean hasNext() {
+      return !EOF;
+    }
+
+    public String next() {
+      String result = buffer;
+      try {
+        if (linesRead > 0 && linesRead % (sampleSize/splits) == 0){
+          currentSplit ++;
+
+          int nextStartLine = (getLineCountEstimate()/splits)*currentSplit;
+
+          int expectedLinesSkipped = (int) ((charSkipped * linesRead) /charCount);
+          long charsToSkip = ((nextStartLine - (linesRead + expectedLinesSkipped)) * (charCount/linesRead));
+
+          charSkipped += (int) bufferedReader.skip(charsToSkip);
+          bufferedReader.readLine();
+        }
+        buffer = bufferedReader.readLine();
+        if(buffer == null) {
+          EOF = true;
+          bufferedReader.close();
+        }
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+      // length + 2 to account for \n characters
+      charCount += result.length() + 2;
+      linesRead ++;
+      return result;
+    }
+
+    public void remove() {
+      // not implemented
+    }
+
+  }
+
+  public Iterator<String> iterator() {
+    return new CsvFileIterator();
+  }
+
+  public int getLineCountEstimate() {
+    return (int) ((fileSize * linesRead) / charCount);
+  }
+}