Skip to content

Commit

Permalink
Resolves "Reviewer feedback for 2.0.0 RC1" (#283)
Browse files Browse the repository at this point in the history
  • Loading branch information
rzo1 authored Nov 20, 2023
1 parent 47deef4 commit 0917eec
Show file tree
Hide file tree
Showing 32 changed files with 126 additions and 184 deletions.
25 changes: 0 additions & 25 deletions dkpro-jwpl-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -182,13 +182,6 @@
<include>**/*</include>
</includes>
</resource>
<resource>
<directory>src/it/resources</directory>
<filtering>true</filtering>
<includes>
<include>**/*</include>
</includes>
</resource>
</resources>
<plugins>
<plugin>
Expand Down Expand Up @@ -270,24 +263,6 @@
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<id>create-executable-jar</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<attach>false</attach>
<descriptorRefs>
<descriptor>jar-with-dependencies</descriptor>
</descriptorRefs>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import java.util.Set;

import org.dkpro.jwpl.api.exception.WikiApiException;
import org.dkpro.jwpl.util.GraphUtilities;
import org.dkpro.jwpl.api.util.GraphUtilities;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@
import org.dkpro.jwpl.api.exception.WikiApiException;
import org.dkpro.jwpl.api.exception.WikiPageNotFoundException;
import org.dkpro.jwpl.api.exception.WikiTitleParsingException;
import org.dkpro.jwpl.api.util.ApiUtilities;
import org.dkpro.jwpl.api.util.CommonUtilities;
import org.dkpro.jwpl.api.util.GraphSerialization;
import org.dkpro.jwpl.util.ApiUtilities;
import org.dkpro.jwpl.util.CommonUtilities;
import org.dkpro.jwpl.util.OS;
import org.dkpro.jwpl.api.util.OS;
import org.jgrapht.GraphPath;
import org.jgrapht.alg.connectivity.ConnectivityInspector;
import org.jgrapht.alg.shortestpath.DijkstraShortestPath;
Expand Down
10 changes: 5 additions & 5 deletions dkpro-jwpl-api/src/main/java/org/dkpro/jwpl/api/Page.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@
import org.dkpro.jwpl.api.exception.WikiTitleParsingException;
import org.dkpro.jwpl.api.hibernate.PageDAO;
import org.dkpro.jwpl.api.sweble.PlainTextConverter;
import org.dkpro.jwpl.util.UnmodifiableArraySet;
import org.dkpro.jwpl.api.util.UnmodifiableArraySet;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.PageId;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.PageTitle;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.WtEngineImpl;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.nodes.EngProcessedPage;
import org.hibernate.LockOptions;
import org.hibernate.Session;
import org.hibernate.type.StandardBasicTypes;
import org.sweble.wikitext.engine.PageId;
import org.sweble.wikitext.engine.PageTitle;
import org.sweble.wikitext.engine.WtEngineImpl;
import org.sweble.wikitext.engine.nodes.EngProcessedPage;

import de.fau.cs.osr.ptk.common.AstVisitor;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@

import org.dkpro.jwpl.api.exception.WikiApiException;
import org.dkpro.jwpl.api.exception.WikiPageNotFoundException;
import org.dkpro.jwpl.util.ApiUtilities;
import org.dkpro.jwpl.util.StringUtils;
import org.dkpro.jwpl.api.util.ApiUtilities;
import org.dkpro.jwpl.api.util.StringUtils;
import org.hibernate.Session;
import org.hibernate.query.Query;
import org.slf4j.Logger;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@

import javax.xml.parsers.ParserConfigurationException;

import org.sweble.wikitext.engine.config.WikiConfig;
import org.sweble.wikitext.engine.utils.DefaultConfigEnWp;
import org.sweble.wikitext.engine.utils.LanguageConfigGenerator;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.config.WikiConfig;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.utils.DefaultConfigEnWp;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.utils.LanguageConfigGenerator;
import org.xml.sax.SAXException;

import com.neovisionaries.i18n.LanguageCode;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@
import org.dkpro.jwpl.api.exception.WikiPageNotFoundException;
import org.dkpro.jwpl.api.exception.WikiTitleParsingException;
import org.dkpro.jwpl.api.hibernate.WikiHibernateUtil;
import org.dkpro.jwpl.util.distance.LevenshteinStringDistance;
import org.dkpro.jwpl.api.util.distance.LevenshteinStringDistance;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.config.WikiConfig;
import org.hibernate.Session;
import org.hibernate.query.NativeQuery;
import org.hibernate.type.StandardBasicTypes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.sweble.wikitext.engine.config.WikiConfig;

/**
* Provides access to Wikipedia articles and categories.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

import org.dkpro.jwpl.api.exception.WikiApiException;
import org.dkpro.jwpl.api.exception.WikiPageNotFoundException;
import org.dkpro.jwpl.util.ApiUtilities;
import org.dkpro.jwpl.api.util.ApiUtilities;
import org.hibernate.Session;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@
import java.util.List;
import java.util.regex.Pattern;

import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.PageTitle;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.config.WikiConfig;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.utils.DefaultConfigEnWp;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.sweble.wikitext.engine.PageTitle;
import org.sweble.wikitext.engine.config.WikiConfig;
import org.sweble.wikitext.engine.utils.DefaultConfigEnWp;
import org.sweble.wikitext.parser.nodes.WtBold;
import org.sweble.wikitext.parser.nodes.WtExternalLink;
import org.sweble.wikitext.parser.nodes.WtHorizontalRule;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
import java.util.LinkedList;
import java.util.List;

import org.sweble.wikitext.engine.config.WikiConfig;
import org.sweble.wikitext.engine.utils.DefaultConfigEnWp;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.config.WikiConfig;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.utils.DefaultConfigEnWp;
import org.sweble.wikitext.parser.nodes.WtNode;
import org.sweble.wikitext.parser.nodes.WtTemplate;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

import java.lang.invoke.MethodHandles;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

import java.util.Arrays;
import java.util.Map;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

import java.lang.invoke.MethodHandles;
import java.sql.Connection;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

import java.lang.invoke.MethodHandles;
import java.util.HashSet;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

import java.util.HashMap;
import java.util.Map;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

public class OS
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

import java.util.Collection;
import java.util.Iterator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

import java.util.Arrays;
import java.util.Collection;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util.distance;
package org.dkpro.jwpl.api.util.distance;

public class LevenshteinStringDistance
implements StringDistance
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util.distance;
package org.dkpro.jwpl.api.util.distance;

public interface StringDistance
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

import static org.junit.jupiter.api.Assertions.assertSame;

import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.config.WikiConfig;
import org.junit.jupiter.api.Test;
import org.sweble.wikitext.engine.config.WikiConfig;

public class WikiConfigTest
{
Expand Down
8 changes: 8 additions & 0 deletions dkpro-jwpl-deps/dkpro-jwpl-swc-engine-shade/NOTICE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
All classes from this module are taken from third parties.

The classes were published by the Sweble project provided
via https://github.com/sweble/sweble-wikitext
by the Open Source Research Group, University of Erlangen-Nürnberg

under the Apache License, Version 2.0
(http://www.apache.org/licenses/LICENSE-2.0)
5 changes: 4 additions & 1 deletion dkpro-jwpl-deps/dkpro-jwpl-swc-engine-shade/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@
<pattern>javax.xml.bind</pattern>
<shadedPattern>jakarta.xml.bind</shadedPattern>
</relocation>
<relocation>
<pattern>org.sweble.wikitext.engine</pattern>
<shadedPattern>org.dkpro.jwpl.shade.org.sweble.wikitext.engine</shadedPattern>
</relocation>
</relocations>
</configuration>
</execution>
Expand All @@ -146,7 +150,6 @@
-->
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>3.4.0</version>
<executions>
<execution>
<id>workaround-makeItVisibleOnIntellij</id>
Expand Down
80 changes: 68 additions & 12 deletions dkpro-jwpl-revisionmachine/README
Original file line number Diff line number Diff line change
@@ -1,17 +1,73 @@
Known Issues
# Config Examples

Please also see the issues on Google Code
http://code.google.com/p/jwpl/issues/list
## Diff Tool Config

```xml
<config>
<values>
<VALUE_MINIMUM_LONGEST_COMMON_SUBSTRING>12</VALUE_MINIMUM_LONGEST_COMMON_SUBSTRING>
<COUNTER_FULL_REVISION>1000</COUNTER_FULL_REVISION>
</values>
<externals>
<sevenzip>"D:\Programme\Utilities\7-Zip\7z.exe"</sevenzip>
</externals>
<input>
<MODE_SURROGATES>DISCARD_REVISION</MODE_SURROGATES>
<WIKIPEDIA_ENCODING>UTF-8</WIKIPEDIA_ENCODING>
<archive>
<type>SEVENZIP</type>
<path>"D:\simplewiki.7z"</path>
<start>0</start>
</archive>
</input>
<output>
<OUTPUT_MODE>SQL</OUTPUT_MODE>
<PATH>""</PATH>
<LIMIT_SQL_FILE_SIZE>1000000000</LIMIT_SQL_FILE_SIZE>
<MODE_ZIP_COMPRESSION_ENABLED>true</MODE_ZIP_COMPRESSION_ENABLED>
</output>
<cache>
<LIMIT_TASK_SIZE_REVISIONS>5000000</LIMIT_TASK_SIZE_REVISIONS>
<LIMIT_TASK_SIZE_DIFFS>1000000</LIMIT_TASK_SIZE_DIFFS>
<LIMIT_SQLSERVER_MAX_ALLOWED_PACKET>1000000</LIMIT_SQLSERVER_MAX_ALLOWED_PACKET>
</cache>
<logging>
<root_folder>""</root_folder>
<diff_tool>
<level>INFO</level>
</diff_tool>
</logging>
<debug>
<verification_diff>false</verification_diff>
<verification_encoding>false</verification_encoding>
<statistical_output>false</statistical_output>
<debug_output>
<enabled>false</enabled>
<path>""</path>
</debug_output>
</debug>
<filter>
<namespaces>
<!-- White list of namespaces; if nothing here then all namespaces are allowed -->
<ns>0</ns>
<ns>1</ns>
</namespaces>
</filter>
</config>

I)
Handling of surrogate characters
```

There are 4 possible modes of handling UTF8 surrogate characters.
Currently, the only reliable mode is "Discard Revision", in which any revision that contains surrogate characters is discarded.
The other three modes in "org.dkpro.jwpl.revisionmachine.difftool.data.SurrogateModes" have been disabled for now.
The corresponding config-section in the config tool has also been made invisible (org.dkpro.jwpl.revisionmachine.difftool.config.gui.panels.InputPanel)
The disabled parts are marked with TODO-markers
## Index Generator Config

In order to use the other three surrogate modes, which try to handle surrogate characters differently,
the corresponding code has to be checked. Afterwards, the modes can be reenables in the config tool (InputPanel.java) and the SurrogateModes-class
```
host=localhost
db=wiki_en_20110405_rev
user=student
password=student
output=target
outputDatabase=false
outputDatafile=false
charset=UTF8
buffer=15000
maxAllowedPackets=16760832
```
Loading

0 comments on commit 0917eec

Please sign in to comment.