Skip to content

Commit

Permalink
Merge pull request #4 from miaaaooow/master
Browse files Browse the repository at this point in the history
Sloth.Works Hackathon - GATE Experiment
  • Loading branch information
Bozhidar Bozhanov authored Jan 22, 2018
2 parents e15e97d + 14f6e96 commit 9a8322c
Show file tree
Hide file tree
Showing 56 changed files with 4,772 additions and 1 deletion.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
**.idea/
*.iml
**/target/**

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ OpenLex цели създаването на машинно четима вер
- [RDF for Legislation, Gov UK](https://www.legislation.gov.uk/developer/formats/rdf)
- [Open Legislation docs](http://openlegislation.readthedocs.io/en/latest/)
- [LEOS, European Commission](https://ec.europa.eu/isa2/solutions/leos_en)

- [GATE](https://gate.ac.uk/overview.html)
3 changes: 3 additions & 0 deletions sloth.works.hakathon/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
**.idea/
*.iml

6 changes: 6 additions & 0 deletions sloth.works.hakathon/GateTasks.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Задачи:
- В закон(plain text) разпознаване на закони, алинеи и членове
- LegalXML версия на закона
- В поправки от ДВ - разпознаване на поправка, шаблон, за кой закон е
- Генериране на diff

24 changes: 24 additions & 0 deletions sloth.works.hakathon/LawStructure.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Структура на закон
==================

закон law
глава chapter
раздел section
член member
алинея subparagraph
точка point
буква letter
изречение sentence

изменения
допълнителни разпоредби ~ раздел
преходни разпоредби
заключителни разпоредби
параграф

amendments
additional provisions ~ section
transitional provisions
final provisions
paragraph

35 changes: 35 additions & 0 deletions sloth.works.hakathon/RuleTypes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
Типове промени

§ - членове в Допълнителни разпоредби
§ - членове в Заключителни разпоредби

§ или само число - дефинира предстоящи промени



Създава се ... - add
....се cъздава - add
В - in
след думата - after word


думите ... се заменят ... - substirude words ... with ....

в основния текст думите ... се заменят ... - substirude words ... with ...


... се изменя така - is changed as follows
....се изменя със - change with
се добавя.... - add
накрая се добавя - add at the end

се добавя запетая - ,
се създава изречение второ -
.... се отменя - ... delete
... се заличават - ... delete
Заглавието се изменя така...
Досегашният текст става ал.1. Създава се ал. 2


Алинея 1
ал. 1
19 changes: 19 additions & 0 deletions sloth.works.hakathon/gate-experiment/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.openlex.gate</groupId>
<artifactId>gate-experiment</artifactId>
<version>1.0-SNAPSHOT</version>

<dependencies>
<dependency>
<groupId>uk.ac.gate</groupId>
<artifactId>gate-core</artifactId>
<version>8.4</version>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
package org.openlex.experiments.io;

import gate.*;
import gate.util.GateException;
import gate.util.InvalidOffsetException;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.*;

/**
* Created by mateva on 21.01.18.
*/
public class AnnotatedCorpusReader {
private static String PATH_TO_GATE = "/home/mateva/Installs/GATE";
private static String PATH_TO_GATE_PLUGINS = "/home/mateva/Installs/GATE/plugins";

private static String PATH_TO_RESOURCES = "/home/mateva/OpenLex/M/OpenLex/sloth.works.hakathon/gate-experiment/src/main/resources/";
private static String PATH_TO_FILE_RESOURCES = "file://" + PATH_TO_RESOURCES;
private static String PATH_TO_RESULT_OUTPUT = PATH_TO_RESOURCES + "results/";
private static String PATH_TO_ORIGINAL_OUTPUT = PATH_TO_RESOURCES + "original/";

private static String DATA_STORE_CLASS = "gate.persist.SerialDataStore";
private static String DOC_IMPL_CLASS = "gate.corpora.DocumentImpl";



private void read() {
setupAndStartGate();

DataStore annotatedLaws = null;
DataStore annotatedAmendments = null;
try {
annotatedLaws = Factory.openDataStore(DATA_STORE_CLASS, PATH_TO_FILE_RESOURCES + "laws");
annotatedAmendments = Factory.openDataStore(DATA_STORE_CLASS, PATH_TO_FILE_RESOURCES + "amends");

List annotatedAmendmentsLrIds = annotatedAmendments.getLrIds(DOC_IMPL_CLASS);

Set<Diff> diffs = new HashSet<>();

for (Object id : annotatedAmendmentsLrIds) {
Document d = readDocumentFrom(annotatedAmendments, id);

for (Annotation a : d.getAnnotations().get("RuleSubstitute")) {
FeatureMap map = a.getFeatures();
String alNum = (String) map.get("alinea_number");
String articleNum = (String) map.get("article_number");
String what = (String) map.get("what");
String withWhat = (String) map.get("withWhat");

Diff diff = new Diff(alNum, articleNum, what, withWhat, d);
diffs.add(diff);

System.out.println(alNum);
System.out.println(articleNum);
System.out.println(what);
System.out.println(withWhat);
}

Factory.deleteResource(d);
// }
}

List lawsDocIds = annotatedLaws.getLrIds(DOC_IMPL_CLASS);

for (Object id : lawsDocIds) {
Document d = readDocumentFrom(annotatedLaws, id);

String originalContent = d.getContent().toString();
String name = d.getName();
writeContentTOFileAtPath(originalContent, PATH_TO_ORIGINAL_OUTPUT + name);

AnnotationSet alineaContents = d.getAnnotations().get("AlineaContent");

Map<String, String> changed = new HashMap<>();
for (Diff diff : diffs) {

for (Annotation alineaContent : alineaContents) {
FeatureMap features = alineaContent.getFeatures();
if (diff.getAlNum().equals(features.get("number"))
&& diff.getArticleNum().equals(features.get("article_number"))) {
System.out.println("Match!");
}
String tosub = getPartOfDocument(d, alineaContent.getStartNode().getOffset(),
alineaContent.getEndNode().getOffset());
String newVer = tosub.replaceAll(diff.getWhat(), diff.getWithWhat());
changed.put(tosub, newVer);
}

}
for (Map.Entry<String, String> entry : changed.entrySet()) {
originalContent = originalContent.replace(entry.getKey(), entry.getValue());
}

writeContentTOFileAtPath(originalContent, PATH_TO_RESULT_OUTPUT + name);
Factory.deleteResource(d);

}

} catch (GateException e) {
System.out.println(e);
}
}

private String getPartOfDocument(Document docs, Long startOffSet, Long endOffset) {
try {
return docs.getContent().getContent(startOffSet, endOffset).toString();
} catch (InvalidOffsetException e) {
handleFuckingException(e);
}
return null;
}

private void writeOriginalFile(Document document) {
String originalContent = document.getContent().toString();
String name = document.getName();
writeContentTOFileAtPath(originalContent, PATH_TO_ORIGINAL_OUTPUT + name);
}

private void writeContentTOFileAtPath(String content, String path) {
try {
BufferedWriter writer = new BufferedWriter(new FileWriter(path));
writer.write(content);
writer.close();
} catch (IOException e) {
handleFuckingException(e);
}
}

private void setupAndStartGate() {
if (Gate.getGateHome() == null) {
Gate.setGateHome(new File(PATH_TO_GATE));
}
if (Gate.getPluginsHome() == null) {
Gate.setPluginsHome(new File(PATH_TO_GATE_PLUGINS));
}

try {
Gate.init();
} catch (GateException ge) {
handleFuckingException(ge);
}
}

private Document readDocumentFrom(DataStore ds, Object id) {
try {
return (Document) Factory.createResource(DOC_IMPL_CLASS,
gate.Utils.featureMap(DataStore.DATASTORE_FEATURE_NAME, ds,
DataStore.LR_ID_FEATURE_NAME, id));
} catch (Exception e) {
handleFuckingException(e);
}
return null;
}

private void handleFuckingException(Exception e) {
System.out.println(e);
}

public static void main(String[] args) {
AnnotatedCorpusReader reader = new AnnotatedCorpusReader();
reader.read();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package org.openlex.experiments.io;

import gate.Document;

/**
* Created by mateva on 21.01.18.
*/
public class Diff {
String alNum ;
String articleNum ;
String what;
String withWhat ;
Document doc;

public Diff(String alNum, String articleNum, String what, String withWhat, Document doc) {
this.alNum = alNum;
this.articleNum = articleNum;
this.what = what;
this.withWhat = withWhat;
this.doc = doc;
}

public Document getDoc() {
return doc;
}

public String getAlNum() {
return alNum;
}

public void setAlNum(String alNum) {
this.alNum = alNum;
}

public String getArticleNum() {
return articleNum;
}

public void setArticleNum(String articleNum) {
this.articleNum = articleNum;
}

public String getWhat() {
return what;
}

public void setWhat(String what) {
this.what = what;
}

public String getWithWhat() {
return withWhat;
}

public void setWithWhat(String withWhat) {
this.withWhat = withWhat;
}

}
Loading

0 comments on commit 9a8322c

Please sign in to comment.