Initial work

cgivre · Jun 25, 2024 · 4dc9f12 · 4dc9f12
1 parent 40500ec
commit 4dc9f12
Showing 1 changed file with 26 additions and 1 deletion.
diff --git a/.../src/main/java/org/apache/drill/exec/store/easy/text/reader/CompliantTextBatchReader.java b/.../src/main/java/org/apache/drill/exec/store/easy/text/reader/CompliantTextBatchReader.java
@@ -156,6 +156,14 @@ private FieldVarCharOutput buildWithSchema(FileSchemaNegotiator schemaNegotiator
     return new FieldVarCharOutput(receiver);
   }
 
+  /**
+   * Builds a schema from the first few rows of a CSV file.
+   * @return A {@link TupleMetadata} of the schema of the data.
+   */
+  private TupleMetadata buildSchemaFromFirstRows() {
+    return null;
+  }
+
   private TupleMetadata buildSchemaFromHeaders(String[] fieldNames) {
     // Build table schema from headers
     TupleMetadata readerSchema = new TupleSchema();
@@ -314,7 +322,7 @@ private String[] extractHeader(FileSchemaNegotiator schemaNegotiator) throws IOE
     final InputStream hStream = schemaNegotiator.file().open();
     final HeaderBuilder hOutput = new HeaderBuilder(split.getPath());
 
-    // we should read file header irrespective of split given given to this reader
+    // we should read file header irrespective of split given to this reader
     final TextInput hInput = new TextInput(settings, hStream, readBuffer, 0, split.getLength());
 
     final String [] fieldNames;
@@ -335,6 +343,23 @@ private String[] extractHeader(FileSchemaNegotiator schemaNegotiator) throws IOE
     return fieldNames;
   }
 
+  private void extractFirstRows(FileSchemaNegotiator schemaNegotiator, int rowCount) throws IOException {
+    assert settings.isHeaderExtractionEnabled();
+
+    // don't skip header in case skipFirstLine is set true
+    settings.setSkipFirstLine(false);
+
+    // Get the header row
+    String[] fieldNames = extractHeader(schemaNegotiator);
+
+    settings.setSkipFirstLine(true);
+
+    readBuffer.clear();
+    whitespaceBuffer.clear();
+
+  }
+
+
   /**
    * Generates the next record batch
    * @return number of records in the batch