Skip to content
This repository has been archived by the owner on May 30, 2023. It is now read-only.

Commit

Permalink
More work on language support on all relevant endpoints
Browse files Browse the repository at this point in the history
  • Loading branch information
rahulbot committed Nov 4, 2019
1 parent 90cf2c6 commit e6dc7c1
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 9 deletions.
4 changes: 2 additions & 2 deletions common/src/main/java/org/mediacloud/cliff/EntityParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ public ExtractedEntities extractAndResolve(String inputText, boolean manuallyRep
}

@SuppressWarnings("rawtypes")
public ExtractedEntities extractAndResolveFromSentences(Map[] sentences, boolean manuallyReplaceDemonyms) throws Exception {
public ExtractedEntities extractAndResolveFromSentences(Map[] sentences, boolean manuallyReplaceDemonyms, String langauge) throws Exception {
logger.trace("input: {}", (Object[]) sentences);
long startTime = System.nanoTime();
ExtractedEntities extractedEntities = extractor.extractEntitiesFromSentences(sentences,manuallyReplaceDemonyms);
ExtractedEntities extractedEntities = extractor.extractEntitiesFromSentences(sentences,manuallyReplaceDemonyms, langauge);
long extract = System.nanoTime() - startTime;
logger.trace("extracted: {}", extractedEntities.getLocations());
startTime = System.nanoTime();
Expand Down
14 changes: 10 additions & 4 deletions common/src/main/java/org/mediacloud/cliff/ParseManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,12 @@ public static HashMap parseFromText(String text,boolean manuallyReplaceDemonyms,
long endTime = System.currentTimeMillis();
long elapsedMillis = endTime - startTime;
results.put("milliseconds", elapsedMillis);
results.put("language", language);
return results;
}

@SuppressWarnings({ "unchecked", "rawtypes" })
public static HashMap parseFromSentences(String jsonText, boolean manuallyReplaceDemonyms) {
public static HashMap parseFromSentences(String jsonText, boolean manuallyReplaceDemonyms, String language) {
long startTime = System.currentTimeMillis();
HashMap results = null;
if(jsonText.trim().length()==0){
Expand All @@ -136,14 +137,15 @@ public static HashMap parseFromSentences(String jsonText, boolean manuallyReplac
try {
Gson gson = new Gson();
Map[] sentences = gson.fromJson(jsonText, Map[].class);
ExtractedEntities entities = extractAndResolveFromSentences(sentences,manuallyReplaceDemonyms);
ExtractedEntities entities = extractAndResolveFromSentences(sentences,manuallyReplaceDemonyms, language);
results = parseFromEntities(entities);
} catch (Exception e) {
results = getErrorText(e.toString());
}
long endTime = System.currentTimeMillis();
long elapsedMillis = endTime - startTime;
results.put("milliseconds", elapsedMillis);
results.put("language", language);
return results;
}

Expand Down Expand Up @@ -315,9 +317,13 @@ public static ExtractedEntities extractAndResolve(String text,boolean manuallyRe
return extractAndResolve(text, manuallyReplaceDemonyms, EntityExtractor.ENGLISH);
}

@SuppressWarnings("rawtypes")
public static ExtractedEntities extractAndResolveFromSentences(Map[] sentences, boolean manuallyReplaceDemonyms) throws Exception{
return getParserInstance().extractAndResolveFromSentences(sentences, manuallyReplaceDemonyms);
return getParserInstance().extractAndResolveFromSentences(sentences, manuallyReplaceDemonyms, EntityExtractor.ENGLISH);
}

@SuppressWarnings("rawtypes")
public static ExtractedEntities extractAndResolveFromSentences(Map[] sentences, boolean manuallyReplaceDemonyms, String langauge) throws Exception{
return getParserInstance().extractAndResolveFromSentences(sentences, manuallyReplaceDemonyms, langauge);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,13 @@ public ExtractedEntities extractEntities(String textToParse, boolean manuallyRep
}

@SuppressWarnings("rawtypes")
public ExtractedEntities extractEntitiesFromSentences(Map[] sentences, boolean manuallyReplaceDemonyms){
public ExtractedEntities extractEntitiesFromSentences(Map[] sentences, boolean manuallyReplaceDemonyms, String langauge){
ExtractedEntities e = new ExtractedEntities();
try {
Iterator<EntityExtractor> extractors = loader.iterator();
while (extractors != null && extractors.hasNext()) {
EntityExtractor currentExtractor = extractors.next();
ExtractedEntities e2 = currentExtractor.extractEntitiesFromSentences(sentences, manuallyReplaceDemonyms);
ExtractedEntities e2 = currentExtractor.extractEntitiesFromSentences(sentences, manuallyReplaceDemonyms, langauge);
e.merge(e2);
}
} catch (ServiceConfigurationError serviceError) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import javax.servlet.http.HttpServletResponse;

import org.mediacloud.cliff.ParseManager;
import org.mediacloud.cliff.extractor.EntityExtractor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -44,6 +45,10 @@ protected void doGet(HttpServletRequest request, HttpServletResponse response) t

HashMap results = null;
String text = request.getParameter("q");
String language = request.getParameter("language");
if (language == null) {
language = EntityExtractor.ENGLISH;
}
String replaceAllDemonymsStr = request.getParameter("replaceAllDemonyms");
boolean manuallyReplaceDemonyms = (replaceAllDemonymsStr==null) ? false : Boolean.parseBoolean(replaceAllDemonymsStr);
logger.debug("q="+text);
Expand All @@ -53,7 +58,7 @@ protected void doGet(HttpServletRequest request, HttpServletResponse response) t
response.sendError(HttpServletResponse.SC_BAD_REQUEST);
} else {
try {
results = ParseManager.parseFromSentences(text,manuallyReplaceDemonyms);
results = ParseManager.parseFromSentences(text,manuallyReplaceDemonyms, language);
} catch(Exception e){ // try to give the user something useful
logger.error(e.toString());
results = ParseManager.getErrorText(e.toString());
Expand Down

0 comments on commit e6dc7c1

Please sign in to comment.