-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
bumps fluxtion to version 9.2.23 data ingestion example
- Loading branch information
1 parent
ac4bd3d
commit 2e2b288
Showing
18 changed files
with
3,383 additions
and
156 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
75 changes: 74 additions & 1 deletion
75
cookbook/src/main/java/com/fluxtion/example/cookbook/dataingestion/Main.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,96 @@ | ||
package com.fluxtion.example.cookbook.dataingestion; | ||
|
||
import com.fluxtion.example.cookbook.dataingestion.api.DataIngestComponent; | ||
import com.fluxtion.example.cookbook.dataingestion.api.DataIngestConfig; | ||
import com.fluxtion.example.cookbook.dataingestion.api.HouseRecord; | ||
import com.fluxtion.example.cookbook.dataingestion.pipeline.DataIngestionPipeline; | ||
|
||
import java.io.BufferedOutputStream; | ||
import java.io.IOException; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.util.stream.Stream; | ||
|
||
/** | ||
* Executes a {@link DataIngestionPipeline} with data from kaggle's AmesHousing.csv data file. The pipeline behaviour | ||
* <ul> | ||
* <li>Subscribes to String events</li> | ||
* <li>Tries to marshal the String from csv into a {@link HouseRecord} </li> | ||
* <li>Transforms the {@link HouseRecord} by applying a user transform function</li> | ||
* <li>Validates the transformed {@link HouseRecord} is valid with a user supplied {@link java.util.function.Predicate}</li> | ||
* <li>Writes the valid {@link HouseRecord} to a user supplied {@link java.io.Writer} as CSV</li> | ||
* <li>Writes the valid {@link HouseRecord} to a user supplied {@link java.io.OutputStream} in a binary format</li> | ||
* <li>Processing stats are updated with each valid transformed {@link HouseRecord}</li> | ||
* </ul> | ||
* | ||
* Any processing errors are recorded as: | ||
* <ul> | ||
* <li>An entry in the invalid log that writes to a user supplied {@link java.io.Writer}</li> | ||
* <li>Processing stats are updated with each csv error</li> | ||
* <li>Processing stats are updated with each {@link HouseRecord} validation failure</li> | ||
* </ul> | ||
* | ||
* Dynamic configuration is supplied in an instance of {@link DataIngestConfig} for: | ||
* <ul> | ||
* <li>{@link HouseRecord} validation {@link java.util.function.Predicate}</li> | ||
* <li>{@link HouseRecord} validation transformer as {@link java.util.function.UnaryOperator}</li> | ||
* <li>Post process Csv output - {@link java.io.Writer}</li> | ||
* <li>Post process binary output - {@link java.io.OutputStream}</li> | ||
* <li>Statistics output - {@link java.io.Writer}</li> | ||
* <li>Invalid log output - {@link java.io.Writer}</li> | ||
* </ul> | ||
* | ||
* | ||
*/ | ||
public class Main { | ||
|
||
public static void main(String[] args) throws IOException { | ||
//set up pipeline | ||
var dataIngest = new DataIngestionPipeline(); | ||
//lifecycle call to init pipeline, user components that implement DataIngestLifecycle receive init callback | ||
dataIngest.init(); | ||
|
||
try (Stream<String> reader = Files.lines(Path.of("data/ml/linear_regression/AmesHousing.csv"))) { | ||
//get the exported DataIngestComponent service, used to set configuration as an api call | ||
DataIngestComponent dataIngestComponent = dataIngest.getExportedService(); | ||
|
||
//set up a config for pipeline - can be changed dynamically during the run | ||
Path dataPath = Path.of("data/dataingest/"); | ||
Path dataOutPath = Path.of("data/dataingest/output/"); | ||
Files.createDirectories(dataOutPath); | ||
DataIngestConfig dataIngestConfig = DataIngestConfig.builder() | ||
.houseRecordValidator(houseRecord -> houseRecord.MS_Zoning().equalsIgnoreCase("FV")) | ||
.houseTransformer(Main::tansformInputHouseRecord) | ||
.csvWriter(Files.newBufferedWriter(dataOutPath.resolve("postProcessHouse.csv"))) | ||
.binaryWriter(new BufferedOutputStream(Files.newOutputStream(dataOutPath.resolve("postProcessHouse.binary")))) | ||
.statsWriter(Files.newBufferedWriter(dataOutPath.resolve("processStats.rpt"))) | ||
.invalidLogWriter(Files.newBufferedWriter(dataOutPath.resolve("processingErrors.log"))) | ||
.build(); | ||
|
||
//update the config for the pipeline | ||
dataIngestComponent.configUpdate(dataIngestConfig); | ||
|
||
//send some data as individual events | ||
try (Stream<String> reader = Files.lines(dataPath.resolve("input/AmesHousing.csv"))) { | ||
reader.forEach(dataIngest::onEvent); | ||
} | ||
|
||
//lifecycle call to close pipeline, user components that implement DataIngestLifecycle are receive tearDown callback | ||
dataIngest.tearDown(); | ||
} | ||
|
||
//User supplied function to transform a HouseRecord for post process output | ||
public static HouseRecord tansformInputHouseRecord(HouseRecord houseRecord) { | ||
int lotFrontage = houseRecord.Lot_Frontage(); | ||
houseRecord.Lot_Frontage_Squared(lotFrontage * lotFrontage); | ||
|
||
switch (houseRecord.MS_Zoning()) { | ||
case "A" -> houseRecord.ms_zone_category(1); | ||
case "FV" -> houseRecord.ms_zone_category(2); | ||
case "RL" -> houseRecord.ms_zone_category(3); | ||
case "RM" -> houseRecord.ms_zone_category(4); | ||
default -> houseRecord.ms_zone_category(-1); | ||
} | ||
return houseRecord; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
19 changes: 18 additions & 1 deletion
19
cookbook/src/main/java/com/fluxtion/example/cookbook/dataingestion/api/DataIngestConfig.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,21 @@ | ||
package com.fluxtion.example.cookbook.dataingestion.api; | ||
|
||
public record DataIngestConfig(String name) { | ||
import lombok.Builder; | ||
import lombok.Data; | ||
|
||
import java.io.OutputStream; | ||
import java.io.Writer; | ||
import java.util.function.Predicate; | ||
import java.util.function.UnaryOperator; | ||
|
||
@Data | ||
@Builder | ||
public final class DataIngestConfig { | ||
private final Predicate<HouseRecord> houseRecordValidator; | ||
private final UnaryOperator<HouseRecord> houseTransformer; | ||
private final OutputStream binaryWriter; | ||
private final Writer csvWriter; | ||
private final Writer statsWriter; | ||
private final Writer invalidLogWriter; | ||
|
||
} |
12 changes: 12 additions & 0 deletions
12
cookbook/src/main/java/com/fluxtion/example/cookbook/dataingestion/api/DataIngestStats.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
package com.fluxtion.example.cookbook.dataingestion.api; | ||
|
||
import java.util.function.Consumer; | ||
|
||
public interface DataIngestStats extends DataIngestComponent { | ||
|
||
void publishStats(); | ||
|
||
void currentStats(Consumer<String> consumer); | ||
|
||
void clearStats(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
25 changes: 0 additions & 25 deletions
25
...in/java/com/fluxtion/example/cookbook/dataingestion/function/HouseRecordBinaryWriter.java
This file was deleted.
Oops, something went wrong.
25 changes: 0 additions & 25 deletions
25
.../main/java/com/fluxtion/example/cookbook/dataingestion/function/HouseRecordCsvWriter.java
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
25 changes: 23 additions & 2 deletions
25
.../main/java/com/fluxtion/example/cookbook/dataingestion/function/HouseRecordValidator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.