-
Notifications
You must be signed in to change notification settings - Fork 429
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Java][Datetime] Port BaseDurationExtractor from C# to Java (#955)
* Add DateTime structure * Add DateTime base classes and English duration extractor implementation * Add English duration extractor tests * Fix test skipped status report
- Loading branch information
Showing
60 changed files
with
7,214 additions
and
89 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<groupId>com.microsoft.recognizers.text.datetime</groupId> | ||
<artifactId>recognizers-text-date-time</artifactId> | ||
<version>1.0-SNAPSHOT</version> | ||
<packaging>jar</packaging> | ||
|
||
<name>${project.groupId}:${project.artifactId}</name> | ||
<description>Microsoft.Recognizers.Text - Date-Time Recognizers</description> | ||
<url>https://github.com/Microsoft/Recognizers-Text/</url> | ||
|
||
<parent> | ||
<groupId>com.microsoft.recognizers.text</groupId> | ||
<artifactId>recognizers-text-java</artifactId> | ||
<version>1.0-SNAPSHOT</version> | ||
<relativePath>../../</relativePath> | ||
</parent> | ||
|
||
<licenses> | ||
<license> | ||
<name>MIT License</name> | ||
<url>http://www.opensource.org/licenses/mit-license.php</url> | ||
</license> | ||
</licenses> | ||
|
||
<developers> | ||
<developer> | ||
<name>Microsoft Recognizers-Text</name> | ||
<email></email> | ||
<organization>Microsoft</organization> | ||
<organizationUrl>https://www.microsoft.com/</organizationUrl> | ||
</developer> | ||
</developers> | ||
|
||
<properties> | ||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
</properties> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>com.microsoft.recognizers.text</groupId> | ||
<artifactId>recognizers-text</artifactId> | ||
<version>1.0-SNAPSHOT</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.microsoft.recognizers.text.number</groupId> | ||
<artifactId>recognizers-text-number</artifactId> | ||
<version>1.0-SNAPSHOT</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.microsoft.recognizers.text.numberwithunit</groupId> | ||
<artifactId>recognizers-text-number-with-unit</artifactId> | ||
<version>1.0-SNAPSHOT</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.google.guava</groupId> | ||
<artifactId>guava</artifactId> | ||
<version>24.1-jre</version> | ||
</dependency> | ||
</dependencies> | ||
|
||
<build> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-compiler-plugin</artifactId> | ||
<version>3.7.0</version> | ||
<configuration> | ||
<source>1.8</source> | ||
<target>1.8</target> | ||
</configuration> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
|
||
</project> |
99 changes: 99 additions & 0 deletions
99
Java/libraries/recognizers-text-date-time/resource-definitions.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
{ | ||
"outputPath": "libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/", | ||
"configFiles": [ | ||
{ | ||
"input": [ "Base-DateTime" ], | ||
"output": "BaseDateTime", | ||
"header": [ | ||
"package com.microsoft.recognizers.text.datetime.resources;", | ||
"", | ||
"import java.util.Map;", | ||
"", | ||
"import com.google.common.collect.ImmutableMap;", | ||
"", | ||
"public class BaseDateTime {" | ||
], | ||
"footer": [ "}" ] | ||
}, | ||
{ | ||
"input": [ "English", "English-DateTime" ], | ||
"output": "EnglishDateTime", | ||
"header": [ | ||
"package com.microsoft.recognizers.text.datetime.resources;", | ||
"", | ||
"import java.util.Arrays;", | ||
"import java.util.List;", | ||
"import java.util.Map;", | ||
"", | ||
"import com.google.common.collect.ImmutableMap;", | ||
"", | ||
"public class EnglishDateTime {" | ||
], | ||
"footer": [ "}" ] | ||
}, | ||
{ | ||
"input": [ "Spanish", "Spanish-DateTime" ], | ||
"output": "SpanishDateTime", | ||
"header": [ | ||
"package com.microsoft.recognizers.text.datetime.resources;", | ||
"", | ||
"import java.util.Arrays;", | ||
"import java.util.List;", | ||
"import java.util.Map;", | ||
"", | ||
"import com.google.common.collect.ImmutableMap;", | ||
"", | ||
"public class SpanishDateTime {" | ||
], | ||
"footer": [ "}" ] | ||
}, | ||
{ | ||
"input": [ "Portuguese", "Portuguese-DateTime" ], | ||
"output": "PortugueseDateTime", | ||
"header": [ | ||
"package com.microsoft.recognizers.text.datetime.resources;", | ||
"", | ||
"import java.util.Arrays;", | ||
"import java.util.List;", | ||
"import java.util.Map;", | ||
"", | ||
"import com.google.common.collect.ImmutableMap;", | ||
"", | ||
"public class PortugueseDateTime {" | ||
], | ||
"footer": [ "}" ] | ||
}, | ||
{ | ||
"input": [ "Chinese", "Chinese-DateTime" ], | ||
"output": "ChineseDateTime", | ||
"header": [ | ||
"package com.microsoft.recognizers.text.datetime.resources;", | ||
"", | ||
"import java.util.Arrays;", | ||
"import java.util.List;", | ||
"import java.util.Map;", | ||
"", | ||
"import com.google.common.collect.ImmutableMap;", | ||
"", | ||
"public class ChineseDateTime {" | ||
], | ||
"footer": [ "}" ] | ||
}, | ||
{ | ||
"input": [ "French", "French-DateTime" ], | ||
"output": "FrenchDateTime", | ||
"header": [ | ||
"package com.microsoft.recognizers.text.datetime.resources;", | ||
"", | ||
"import java.util.Arrays;", | ||
"import java.util.List;", | ||
"import java.util.Map;", | ||
"", | ||
"import com.google.common.collect.ImmutableMap;", | ||
"", | ||
"public class FrenchDateTime {" | ||
], | ||
"footer": [ "}" ] | ||
} | ||
] | ||
} |
117 changes: 117 additions & 0 deletions
117
...izers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/Constants.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
package com.microsoft.recognizers.text.datetime; | ||
|
||
import com.microsoft.recognizers.text.datetime.resources.BaseDateTime; | ||
|
||
public class Constants { | ||
|
||
public static final String SYS_DATETIME_DATE = "date"; | ||
public static final String SYS_DATETIME_TIME = "time"; | ||
public static final String SYS_DATETIME_DATEPERIOD = "daterange"; | ||
public static final String SYS_DATETIME_DATETIME = "datetime"; | ||
public static final String SYS_DATETIME_TIMEPERIOD = "timerange"; | ||
public static final String SYS_DATETIME_DATETIMEPERIOD = "datetimerange"; | ||
public static final String SYS_DATETIME_DURATION = "duration"; | ||
public static final String SYS_DATETIME_SET = "set"; | ||
public static final String SYS_DATETIME_DATETIMEALT = "datetimealt"; | ||
public static final String SYS_DATETIME_TIMEZONE = "timezone"; | ||
|
||
// Model Name | ||
public static final String MODEL_DATETIME = "datetime"; | ||
|
||
// Multiple Duration Types | ||
public static final String MultipleDuration_Prefix = "multipleDuration"; | ||
public static final String MultipleDuration_Type = MultipleDuration_Prefix + "Type"; | ||
public static final String MultipleDuration_DateTime = MultipleDuration_Prefix + "DateTime"; | ||
public static final String MultipleDuration_Date = MultipleDuration_Prefix + "Date"; | ||
public static final String MultipleDuration_Time = MultipleDuration_Prefix + "Time"; | ||
|
||
// DateTime Parse | ||
public static final String Resolve = "resolve"; | ||
public static final String ResolveToPast = "resolveToPast"; | ||
public static final String ResolveToFuture = "resolveToFuture"; | ||
|
||
// In the ExtractResult data | ||
public static final String Context = "context"; | ||
public static final String ContextType_RelativePrefix = "relativePrefix"; | ||
public static final String ContextType_RelativeSuffix = "relativeSuffix"; | ||
public static final String ContextType_AmPm = "AmPm"; | ||
public static final String SubType = "subType"; | ||
|
||
// Comment - internal tag used during entity processing, never exposed to users. | ||
// Tags are filtered out in BaseMergedDateTimeParser DateTimeResolution() | ||
public static final String Comment = "Comment"; | ||
// AmPm time representation for time parser | ||
public static final String Comment_AmPm = "ampm"; | ||
// Prefix early/late for time parser | ||
public static final String Comment_Early = "early"; | ||
public static final String Comment_Late = "late"; | ||
// Parse week of date format | ||
public static final String Comment_WeekOf = "WeekOf"; | ||
public static final String Comment_MonthOf = "MonthOf"; | ||
|
||
// Mod Value | ||
// "before" -> To mean "preceding in time". I.e. Does not include the extracted datetime entity in the resolution's ending point. Equivalent to "<" | ||
public static final String BEFORE_MOD = "before"; | ||
|
||
// "after" -> To mean "following in time". I.e. Does not include the extracted datetime entity in the resolution's starting point. Equivalent to ">" | ||
public static final String AFTER_MOD = "after"; | ||
|
||
// "since" -> Same as "after", but including the extracted datetime entity. Equivalent to ">=" | ||
public static final String SINCE_MOD = "since"; | ||
|
||
// "until" -> Same as "before", but including the extracted datetime entity. Equivalent to "<=" | ||
public static final String UNTIL_MOD = "until"; | ||
|
||
public static final String EARLY_MOD = "start"; | ||
public static final String MID_MOD = "mid"; | ||
public static final String LATE_MOD = "end"; | ||
|
||
public static final String MORE_THAN_MOD = "more"; | ||
public static final String LESS_THAN_MOD = "less"; | ||
|
||
public static final String REF_UNDEF_MOD = "ref_undef"; | ||
|
||
// Invalid year | ||
public static final int InvalidYear = Integer.MIN_VALUE; | ||
|
||
public static final int MinYearNum = BaseDateTime.MinYearNum; | ||
public static final int MaxYearNum = BaseDateTime.MaxYearNum; | ||
|
||
public static final int MaxTwoDigitYearFutureNum = BaseDateTime.MaxTwoDigitYearFutureNum; | ||
public static final int MinTwoDigitYearPastNum = BaseDateTime.MinTwoDigitYearPastNum; | ||
|
||
// These are some particular values for timezone recognition | ||
public static final int InvalidOffsetValue = -10000; | ||
public static final String UtcOffsetMinsKey = "utcOffsetMins"; | ||
public static final String TimeZoneText = "timezoneText"; | ||
public static final String TimeZone = "timezone"; | ||
public static final String ResolveTimeZone = "resolveTimeZone"; | ||
public static final int PositiveSign = 1; | ||
public static final int NegativeSign = -1; | ||
|
||
public static final int TrimesterMonthCount = 3; | ||
public static final int SemesterMonthCount = 6; | ||
public static final int WeekDayCount = 7; | ||
public static final int CenturyYearsCount = 100; | ||
|
||
// hours of one half day | ||
public static final int HalfDayHourCount = 12; | ||
// hours of a half mid-day-duration | ||
public static final int HalfMidDayDurationHourCount = 2; | ||
|
||
public static final String DefaultLanguageFallback_MDY = "MDY"; | ||
public static final String DefaultLanguageFallback_DMY = "DMY"; | ||
|
||
// Groups' names for named groups in regexes | ||
public static final String NextGroupName = "next"; | ||
public static final String AmGroupName = "am"; | ||
public static final String PmGroupName = "pm"; | ||
public static final String ImplicitAmGroupName = "iam"; | ||
public static final String ImplicitPmGroupName = "ipm"; | ||
public static final String PrefixGroupName = "prefix"; | ||
public static final String SuffixGroupName = "suffix"; | ||
public static final String DescGroupName = "desc"; | ||
public static final String SecondGroupName = "sec"; | ||
public static final String MinuteGroupName = "min"; | ||
public static final String HourGroupName = "hour"; | ||
} |
24 changes: 24 additions & 0 deletions
24
...text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/DateTimeOptions.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
package com.microsoft.recognizers.text.datetime; | ||
|
||
public enum DateTimeOptions { | ||
None(0), | ||
SkipFromToMerge(1), | ||
SplitDateAndTime(2), | ||
CalendarMode(4), | ||
ExtendedTypes(8), | ||
EnablePreview(8388608); | ||
|
||
private final int value; | ||
|
||
DateTimeOptions(int value) { | ||
this.value = value; | ||
} | ||
|
||
public int getValue() { | ||
return value; | ||
} | ||
|
||
public boolean match(DateTimeOptions option) { | ||
return (this.value & option.value) == option.value; | ||
} | ||
} |
71 changes: 71 additions & 0 deletions
71
...t-date-time/src/main/java/com/microsoft/recognizers/text/datetime/DateTimeRecognizer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
package com.microsoft.recognizers.text.datetime; | ||
|
||
import com.microsoft.recognizers.text.*; | ||
import com.microsoft.recognizers.text.datetime.models.DateTimeModel; | ||
|
||
import java.time.LocalDateTime; | ||
import java.util.List; | ||
import java.util.function.Function; | ||
|
||
public class DateTimeRecognizer extends Recognizer<DateTimeOptions> { | ||
|
||
public DateTimeRecognizer() { | ||
this(null, DateTimeOptions.None, true); | ||
} | ||
|
||
public DateTimeRecognizer(String culture) { | ||
this(culture, DateTimeOptions.None, false); | ||
} | ||
|
||
public DateTimeRecognizer(DateTimeOptions options) { | ||
this(null, options, true); | ||
} | ||
|
||
public DateTimeRecognizer(DateTimeOptions options, boolean lazyInitialization) { | ||
this(null, options, lazyInitialization); | ||
} | ||
|
||
public DateTimeRecognizer(String culture, DateTimeOptions options, boolean lazyInitialization) { | ||
super(culture, options, lazyInitialization); | ||
} | ||
|
||
public DateTimeModel getDateTimeModel() { | ||
return getDateTimeModel(null, true); | ||
} | ||
|
||
public DateTimeModel getDateTimeModel(String culture, boolean fallbackToDefaultCulture) { | ||
return getModel(DateTimeModel.class, culture, fallbackToDefaultCulture); | ||
} | ||
|
||
//region Helper methods for less verbosity | ||
public static List<ModelResult> recognizeDateTime(String query, String culture) { | ||
return recognizeByModel(recognizer -> recognizer.getDateTimeModel(culture, true), query, DateTimeOptions.None, LocalDateTime.now()); | ||
} | ||
|
||
public static List<ModelResult> recognizeDateTime(String query, String culture, DateTimeOptions options) { | ||
return recognizeByModel(recognizer -> recognizer.getDateTimeModel(culture, true), query, options, LocalDateTime.now()); | ||
} | ||
|
||
public static List<ModelResult> recognizeDateTime(String query, String culture, DateTimeOptions options, boolean fallbackToDefaultCulture) { | ||
return recognizeByModel(recognizer -> recognizer.getDateTimeModel(culture, fallbackToDefaultCulture), query, options, LocalDateTime.now()); | ||
} | ||
|
||
public static List<ModelResult> recognizeDateTime(String query, String culture, DateTimeOptions options, boolean fallbackToDefaultCulture, LocalDateTime reference) { | ||
return recognizeByModel(recognizer -> recognizer.getDateTimeModel(culture, fallbackToDefaultCulture), query, options, reference); | ||
} | ||
//endregion | ||
|
||
private static List<ModelResult> recognizeByModel(Function<DateTimeRecognizer, DateTimeModel> getModelFun, String query, DateTimeOptions options, LocalDateTime reference) { | ||
DateTimeRecognizer recognizer = new DateTimeRecognizer(options); | ||
DateTimeModel model = getModelFun.apply(recognizer); | ||
return model.parse(query, reference); | ||
} | ||
|
||
@Override | ||
protected void initializeConfiguration() { | ||
//region English | ||
registerModel(DateTimeModel.class, Culture.English, | ||
(options) -> new DateTimeModel(null, null)); | ||
//endregion | ||
} | ||
} |
Oops, something went wrong.