Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update github actions and JDK 11 #164

Merged
merged 13 commits into from
Jan 6, 2025
68 changes: 68 additions & 0 deletions .github/workflows/ci-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
name: Build unstable

on: [push, workflow_dispatch]

concurrency:
group: gradle

jobs:
build:
runs-on: ubuntu-latest

steps:
- name: Checkout grobid home
uses: actions/checkout@v4
with:
repository: kermitt2/grobid
ref: 0.8.1
path: ./grobid
- name: Checkout grobid-ner
uses: actions/checkout@v4
with:
repository: kermitt2/grobid-ner
path: ./grobid/grobid-ner
- name: Checkout entity-fishing
uses: actions/checkout@v4
with:
path: ./entity-fishing
- name: Set up JDK 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'adopt'
cache: 'gradle'
- name: Build grobid
working-directory: grobid
run: ./gradlew install -x test
- name: Build grobid-ner
working-directory: grobid/grobid-ner
run: ./gradlew install -x test
- name: Install grobid-ner
working-directory: grobid/grobid-ner
run: ./gradlew copyModels
- name: Build entity-fishing with Gradle
working-directory: entity-fishing
run: ./gradlew build -x test


docker-build:
needs: [ build ]
runs-on: ubuntu-latest

steps:
- name: Create more disk space
run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- uses: actions/checkout@v4
- name: Build and push
id: docker_build
uses: mr-smithers-excellent/docker-build-push@v6
with:
dockerfile: Dockerfile
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
image: lfoppiano/entity-fishing
registry: docker.io
pushImage: false
tags: latest-develop
- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
30 changes: 17 additions & 13 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ version = '0.0.6'

description = """entity recognition and disambiguation against Wikidata and Wikipedia in a raw text, partially-annotated text segment, PDF or weighted term vector"""

sourceCompatibility = 1.8
targetCompatibility = 1.8
sourceCompatibility = 1.11
targetCompatibility = 1.11

import org.apache.tools.ant.taskdefs.condition.Os

Expand Down Expand Up @@ -65,6 +65,10 @@ dependencies {
exclude(group: 'ch.qos.logback', module: 'logback-classic')
}

implementation('ch.qos.logback:logback-classic:1.2.3'){
exclude(module: 'com.google.guava:guava')
}

implementation 'black.ninia:jep:4.0.2'
implementation 'org.apache.opennlp:opennlp-tools:1.9.1'
implementation "joda-time:joda-time:2.9.9"
Expand All @@ -76,15 +80,15 @@ dependencies {
implementation group: 'org.wipo.analysers', name: 'wipo-analysers', version: '0.0.1'

// Apache commons
implementation 'org.apache.commons:commons-collections4:4.1'
implementation 'org.apache.commons:commons-collections4:4.3'
implementation 'org.apache.commons:commons-lang3:3.6'
implementation 'commons-logging:commons-logging:1.2'
implementation 'commons-io:commons-io:2.7'
implementation 'commons-pool:commons-pool:1.6'
implementation group: 'org.apache.httpcomponents', name: 'httpclient', version: '4.5.13'
implementation group: 'org.apache.httpcomponents', name: 'httpmime', version: '4.5.13'
implementation group: 'org.apache.commons', name: 'commons-text', version: '1.1'
implementation group: 'com.google.guava', name: 'guava', version: '29.0-jre'
implementation "com.google.guava:guava:31.0.1-jre"

// json and yaml
implementation group: 'com.fasterxml.jackson.core', name: 'jackson-core', version: '2.10.1'
Expand Down Expand Up @@ -152,7 +156,7 @@ configurations.all {
force 'xml-apis:xml-apis:1.4.01'
}

exclude group: "ch.qos.logback", module: "logback-classic"
// exclude group: "ch.qos.logback", module: "logback-classic"
exclude group: 'org.slf4j', module: "slf4j-log4j12"
exclude group: 'org.slf4j', module: "slf4j-jdk14"
exclude group: 'log4j', module: "log4j"
Expand Down Expand Up @@ -211,7 +215,7 @@ task(train_corpus, dependsOn: 'classes', type: JavaExec, group: 'training') {
args getArg('corpus', ''), getArg('lang', 'en')
jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g'
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g', "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g', "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
} else {
jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g'
}
Expand All @@ -225,7 +229,7 @@ task(evaluation, dependsOn: 'classes', type: JavaExec, group: 'evaluation') {
classpath = sourceSets.main.runtimeClasspath
args getArg('corpus', '')
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
} else {
jvmArgs '--Xms2g', '-Xmx8g'
}
Expand All @@ -239,7 +243,7 @@ task(annotatedDataGeneration, dependsOn: 'classes', type: JavaExec, group: 'trai
classpath = sourceSets.main.runtimeClasspath
args getArg('corpus', '')
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
} else {
jvmArgs '--Xms2g', '-Xmx8g'
}
Expand All @@ -254,7 +258,7 @@ task(generate_entity_description, dependsOn: 'classes', type: JavaExec, group: '
classpath = sourceSets.main.runtimeClasspath
args 'data/embeddings/', getArg('lang', 'en')
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
} else {
jvmArgs '--Xms2g', '-Xmx8g'
}
Expand All @@ -269,7 +273,7 @@ task(quantize_word_embeddings, dependsOn: 'classes', type: JavaExec, group: 'emb
classpath = sourceSets.main.runtimeClasspath
args '-i', getArg('i', 'word.embeddings.vec'), '-o', getArg('o', 'word.embeddings.quantized'), '-error', getArg('e', '0.01'), '-hashheader'
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
} else {
jvmArgs '--Xms2g', '-Xmx8g'
}
Expand All @@ -282,7 +286,7 @@ task(generate_entity_embeddings, dependsOn: 'classes', type: JavaExec, group: 'e
classpath = sourceSets.main.runtimeClasspath
args '-in', getArg('in', 'entity.description'), '-v', getArg('v', 'word.embeddings.quantized'), '-out', getArg('out', 'entity.embeddings.vec'), '-n', getArg('n', '8')
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
} else {
jvmArgs '--Xms2g', '-Xmx8g'
}
Expand All @@ -295,7 +299,7 @@ task(quantize_entity_embeddings, dependsOn: 'classes', type: JavaExec, group: 'e
classpath = sourceSets.main.runtimeClasspath
args '-i', getArg('i', 'entity.embeddings.vec'), '-o', getArg('o', 'entity.embeddings.quantized'), '-error', getArg('e', '0.01'), '-hashheader'
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
} else {
jvmArgs '--Xms2g', '-Xmx8g'
}
Expand All @@ -308,7 +312,7 @@ application {

run {
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
}

args = ['server', 'data/config/service.yaml']
Expand Down
8 changes: 5 additions & 3 deletions doc/build.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ Install, build, run, and monitor
Install, build, and run
***********************

*entity-fishing* requires JDK 1.8 or higher. It supports Linux-64.
*entity-fishing* requires JDK 1.11 or higher.
The official supported architecture/OS is Linux-64.

Mac OS environments should work fine, but it is *officially* not supported.
Please use a Linux-64 environment for any production works. Below, we make available the up-to-date and full binary index data for Linux-64 architecture.
Mac OS is not officially supported. Mac OS (Intel) should nevertheless work fine, but ARM does not work.
Please use a Linux-64 environment for any production works.
Below, we make available the up-to-date and full binary index data for Linux-64 architecture.

Running the service requires at least 3GB of RAM for processing text inputs, but more RAM will be exploited if available for speeding up access to the compiled Wikidata and Wikipedia data (including Wikidata statements associated to entities) and for enabling high rate parallel processing. In case PDF are processed, a mimimum of 8GB is required due to additional PDF parsing and structuring requirements. For parallel processing of PDF exploiting multhreading (e.g. 10 parallel threads), 16GB is recommended.

Expand Down
Loading