forked from the-qa-company/qEndpoint
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b9c4d91
commit ec5a712
Showing
9 changed files
with
528 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,3 +58,6 @@ data | |
wikidata | ||
qendpoint-store/wdbench-indexes | ||
wdbench-results | ||
testing | ||
indexing | ||
wdbench-indexes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
118 changes: 118 additions & 0 deletions
118
...re/src/main/java/com/the_qa_company/qendpoint/core/rdf/parsers/ConcurrentInputStream.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
package com.the_qa_company.qendpoint.core.rdf.parsers; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.io.InputStreamReader; | ||
import java.io.PipedInputStream; | ||
import java.io.PipedOutputStream; | ||
import java.nio.charset.StandardCharsets; | ||
|
||
public class ConcurrentInputStream { | ||
|
||
private final InputStream source; | ||
private final int numberOfStreams; | ||
|
||
private PipedInputStream[] pipedInputStreams; | ||
private PipedOutputStream[] pipedOutputStreams; | ||
|
||
private PipedInputStream bnodeInputStream; | ||
private PipedOutputStream bnodeOutputStream; | ||
|
||
private Thread readerThread; | ||
|
||
public ConcurrentInputStream(InputStream stream, int numberOfStreams) { | ||
this.source = stream; | ||
this.numberOfStreams = numberOfStreams; | ||
setupPipes(); | ||
startReadingThread(); | ||
} | ||
|
||
private void setupPipes() { | ||
pipedInputStreams = new PipedInputStream[numberOfStreams]; | ||
pipedOutputStreams = new PipedOutputStream[numberOfStreams]; | ||
|
||
try { | ||
// Set up main fan-out pipes | ||
for (int i = 0; i < numberOfStreams; i++) { | ||
pipedOutputStreams[i] = new PipedOutputStream(); | ||
pipedInputStreams[i] = new PipedInputStream(pipedOutputStreams[i], 131072 * 1024); | ||
} | ||
|
||
// Set up bnode pipe | ||
bnodeOutputStream = new PipedOutputStream(); | ||
bnodeInputStream = new PipedInputStream(bnodeOutputStream, 131072 * 1024); | ||
|
||
} catch (IOException e) { | ||
throw new RuntimeException("Error creating pipes", e); | ||
} | ||
} | ||
|
||
private void startReadingThread() { | ||
readerThread = new Thread(() -> { | ||
try (BufferedReader reader = new BufferedReader(new InputStreamReader(source, StandardCharsets.UTF_8))) { | ||
String line; | ||
int currentStreamIndex = 0; | ||
long lineCount = 0; | ||
long start = System.currentTimeMillis(); | ||
while ((line = reader.readLine()) != null) { | ||
// lineCount++; | ||
// if (lineCount == 1000000) { | ||
// long end = System.currentTimeMillis(); | ||
// long duration = end - start; | ||
// // print lines per second | ||
// System.out.println(String.format("ConcurrentInputStream lines per second: %,d", | ||
// ((int) Math.floor(lineCount / (duration / 1000.0))))); | ||
// start = end; | ||
// lineCount = 0; | ||
// } | ||
|
||
byte[] data = (line + "\n").getBytes(StandardCharsets.UTF_8); | ||
|
||
if (line.contains("_:")) { | ||
// Write to bnodeOutputStream only | ||
bnodeOutputStream.write(data); | ||
} else { | ||
// Write to a single stream from pipedOutputStreams in a | ||
// round-robin manner | ||
pipedOutputStreams[currentStreamIndex].write(data); | ||
currentStreamIndex = (currentStreamIndex + 1) % pipedOutputStreams.length; | ||
} | ||
} | ||
} catch (IOException e) { | ||
// If there's a read error, close everything. | ||
} finally { | ||
// Close all output streams to signal EOF | ||
for (PipedOutputStream out : pipedOutputStreams) { | ||
try { | ||
out.close(); | ||
} catch (IOException ignored) { | ||
} | ||
} | ||
|
||
try { | ||
bnodeOutputStream.close(); | ||
} catch (IOException ignored) { | ||
} | ||
} | ||
}); | ||
|
||
readerThread.setName("ConcurrentInputStream reader"); | ||
readerThread.setDaemon(true); | ||
readerThread.start(); | ||
} | ||
|
||
/** | ||
* Returns the stream for blank-node lines only. | ||
*/ | ||
public InputStream getBnodeStream() { | ||
return bnodeInputStream; | ||
} | ||
|
||
/** | ||
* Returns the array of InputStreams that share all concurrently read data. | ||
*/ | ||
public InputStream[] getStreams() { | ||
return pipedInputStreams; | ||
} | ||
} |
Oops, something went wrong.