Skip to content

Commit

Permalink
[Java][Datetime] Port BaseDurationExtractor from C# to Java (#955)
Browse files Browse the repository at this point in the history
* Add DateTime structure

* Add DateTime base classes and English duration extractor implementation

* Add English duration extractor tests

* Fix test skipped status report
  • Loading branch information
JuanAr authored and tellarin committed Nov 12, 2018
1 parent 08eac17 commit ca137ac
Show file tree
Hide file tree
Showing 60 changed files with 7,214 additions and 89 deletions.
80 changes: 80 additions & 0 deletions Java/libraries/recognizers-text-date-time/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.microsoft.recognizers.text.datetime</groupId>
<artifactId>recognizers-text-date-time</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>

<name>${project.groupId}:${project.artifactId}</name>
<description>Microsoft.Recognizers.Text - Date-Time Recognizers</description>
<url>https://github.com/Microsoft/Recognizers-Text/</url>

<parent>
<groupId>com.microsoft.recognizers.text</groupId>
<artifactId>recognizers-text-java</artifactId>
<version>1.0-SNAPSHOT</version>
<relativePath>../../</relativePath>
</parent>

<licenses>
<license>
<name>MIT License</name>
<url>http://www.opensource.org/licenses/mit-license.php</url>
</license>
</licenses>

<developers>
<developer>
<name>Microsoft Recognizers-Text</name>
<email></email>
<organization>Microsoft</organization>
<organizationUrl>https://www.microsoft.com/</organizationUrl>
</developer>
</developers>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
<dependency>
<groupId>com.microsoft.recognizers.text</groupId>
<artifactId>recognizers-text</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.microsoft.recognizers.text.number</groupId>
<artifactId>recognizers-text-number</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.microsoft.recognizers.text.numberwithunit</groupId>
<artifactId>recognizers-text-number-with-unit</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>24.1-jre</version>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
{
"outputPath": "libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/",
"configFiles": [
{
"input": [ "Base-DateTime" ],
"output": "BaseDateTime",
"header": [
"package com.microsoft.recognizers.text.datetime.resources;",
"",
"import java.util.Map;",
"",
"import com.google.common.collect.ImmutableMap;",
"",
"public class BaseDateTime {"
],
"footer": [ "}" ]
},
{
"input": [ "English", "English-DateTime" ],
"output": "EnglishDateTime",
"header": [
"package com.microsoft.recognizers.text.datetime.resources;",
"",
"import java.util.Arrays;",
"import java.util.List;",
"import java.util.Map;",
"",
"import com.google.common.collect.ImmutableMap;",
"",
"public class EnglishDateTime {"
],
"footer": [ "}" ]
},
{
"input": [ "Spanish", "Spanish-DateTime" ],
"output": "SpanishDateTime",
"header": [
"package com.microsoft.recognizers.text.datetime.resources;",
"",
"import java.util.Arrays;",
"import java.util.List;",
"import java.util.Map;",
"",
"import com.google.common.collect.ImmutableMap;",
"",
"public class SpanishDateTime {"
],
"footer": [ "}" ]
},
{
"input": [ "Portuguese", "Portuguese-DateTime" ],
"output": "PortugueseDateTime",
"header": [
"package com.microsoft.recognizers.text.datetime.resources;",
"",
"import java.util.Arrays;",
"import java.util.List;",
"import java.util.Map;",
"",
"import com.google.common.collect.ImmutableMap;",
"",
"public class PortugueseDateTime {"
],
"footer": [ "}" ]
},
{
"input": [ "Chinese", "Chinese-DateTime" ],
"output": "ChineseDateTime",
"header": [
"package com.microsoft.recognizers.text.datetime.resources;",
"",
"import java.util.Arrays;",
"import java.util.List;",
"import java.util.Map;",
"",
"import com.google.common.collect.ImmutableMap;",
"",
"public class ChineseDateTime {"
],
"footer": [ "}" ]
},
{
"input": [ "French", "French-DateTime" ],
"output": "FrenchDateTime",
"header": [
"package com.microsoft.recognizers.text.datetime.resources;",
"",
"import java.util.Arrays;",
"import java.util.List;",
"import java.util.Map;",
"",
"import com.google.common.collect.ImmutableMap;",
"",
"public class FrenchDateTime {"
],
"footer": [ "}" ]
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package com.microsoft.recognizers.text.datetime;

import com.microsoft.recognizers.text.datetime.resources.BaseDateTime;

public class Constants {

public static final String SYS_DATETIME_DATE = "date";
public static final String SYS_DATETIME_TIME = "time";
public static final String SYS_DATETIME_DATEPERIOD = "daterange";
public static final String SYS_DATETIME_DATETIME = "datetime";
public static final String SYS_DATETIME_TIMEPERIOD = "timerange";
public static final String SYS_DATETIME_DATETIMEPERIOD = "datetimerange";
public static final String SYS_DATETIME_DURATION = "duration";
public static final String SYS_DATETIME_SET = "set";
public static final String SYS_DATETIME_DATETIMEALT = "datetimealt";
public static final String SYS_DATETIME_TIMEZONE = "timezone";

// Model Name
public static final String MODEL_DATETIME = "datetime";

// Multiple Duration Types
public static final String MultipleDuration_Prefix = "multipleDuration";
public static final String MultipleDuration_Type = MultipleDuration_Prefix + "Type";
public static final String MultipleDuration_DateTime = MultipleDuration_Prefix + "DateTime";
public static final String MultipleDuration_Date = MultipleDuration_Prefix + "Date";
public static final String MultipleDuration_Time = MultipleDuration_Prefix + "Time";

// DateTime Parse
public static final String Resolve = "resolve";
public static final String ResolveToPast = "resolveToPast";
public static final String ResolveToFuture = "resolveToFuture";

// In the ExtractResult data
public static final String Context = "context";
public static final String ContextType_RelativePrefix = "relativePrefix";
public static final String ContextType_RelativeSuffix = "relativeSuffix";
public static final String ContextType_AmPm = "AmPm";
public static final String SubType = "subType";

// Comment - internal tag used during entity processing, never exposed to users.
// Tags are filtered out in BaseMergedDateTimeParser DateTimeResolution()
public static final String Comment = "Comment";
// AmPm time representation for time parser
public static final String Comment_AmPm = "ampm";
// Prefix early/late for time parser
public static final String Comment_Early = "early";
public static final String Comment_Late = "late";
// Parse week of date format
public static final String Comment_WeekOf = "WeekOf";
public static final String Comment_MonthOf = "MonthOf";

// Mod Value
// "before" -> To mean "preceding in time". I.e. Does not include the extracted datetime entity in the resolution's ending point. Equivalent to "<"
public static final String BEFORE_MOD = "before";

// "after" -> To mean "following in time". I.e. Does not include the extracted datetime entity in the resolution's starting point. Equivalent to ">"
public static final String AFTER_MOD = "after";

// "since" -> Same as "after", but including the extracted datetime entity. Equivalent to ">="
public static final String SINCE_MOD = "since";

// "until" -> Same as "before", but including the extracted datetime entity. Equivalent to "<="
public static final String UNTIL_MOD = "until";

public static final String EARLY_MOD = "start";
public static final String MID_MOD = "mid";
public static final String LATE_MOD = "end";

public static final String MORE_THAN_MOD = "more";
public static final String LESS_THAN_MOD = "less";

public static final String REF_UNDEF_MOD = "ref_undef";

// Invalid year
public static final int InvalidYear = Integer.MIN_VALUE;

public static final int MinYearNum = BaseDateTime.MinYearNum;
public static final int MaxYearNum = BaseDateTime.MaxYearNum;

public static final int MaxTwoDigitYearFutureNum = BaseDateTime.MaxTwoDigitYearFutureNum;
public static final int MinTwoDigitYearPastNum = BaseDateTime.MinTwoDigitYearPastNum;

// These are some particular values for timezone recognition
public static final int InvalidOffsetValue = -10000;
public static final String UtcOffsetMinsKey = "utcOffsetMins";
public static final String TimeZoneText = "timezoneText";
public static final String TimeZone = "timezone";
public static final String ResolveTimeZone = "resolveTimeZone";
public static final int PositiveSign = 1;
public static final int NegativeSign = -1;

public static final int TrimesterMonthCount = 3;
public static final int SemesterMonthCount = 6;
public static final int WeekDayCount = 7;
public static final int CenturyYearsCount = 100;

// hours of one half day
public static final int HalfDayHourCount = 12;
// hours of a half mid-day-duration
public static final int HalfMidDayDurationHourCount = 2;

public static final String DefaultLanguageFallback_MDY = "MDY";
public static final String DefaultLanguageFallback_DMY = "DMY";

// Groups' names for named groups in regexes
public static final String NextGroupName = "next";
public static final String AmGroupName = "am";
public static final String PmGroupName = "pm";
public static final String ImplicitAmGroupName = "iam";
public static final String ImplicitPmGroupName = "ipm";
public static final String PrefixGroupName = "prefix";
public static final String SuffixGroupName = "suffix";
public static final String DescGroupName = "desc";
public static final String SecondGroupName = "sec";
public static final String MinuteGroupName = "min";
public static final String HourGroupName = "hour";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package com.microsoft.recognizers.text.datetime;

public enum DateTimeOptions {
None(0),
SkipFromToMerge(1),
SplitDateAndTime(2),
CalendarMode(4),
ExtendedTypes(8),
EnablePreview(8388608);

private final int value;

DateTimeOptions(int value) {
this.value = value;
}

public int getValue() {
return value;
}

public boolean match(DateTimeOptions option) {
return (this.value & option.value) == option.value;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package com.microsoft.recognizers.text.datetime;

import com.microsoft.recognizers.text.*;
import com.microsoft.recognizers.text.datetime.models.DateTimeModel;

import java.time.LocalDateTime;
import java.util.List;
import java.util.function.Function;

public class DateTimeRecognizer extends Recognizer<DateTimeOptions> {

public DateTimeRecognizer() {
this(null, DateTimeOptions.None, true);
}

public DateTimeRecognizer(String culture) {
this(culture, DateTimeOptions.None, false);
}

public DateTimeRecognizer(DateTimeOptions options) {
this(null, options, true);
}

public DateTimeRecognizer(DateTimeOptions options, boolean lazyInitialization) {
this(null, options, lazyInitialization);
}

public DateTimeRecognizer(String culture, DateTimeOptions options, boolean lazyInitialization) {
super(culture, options, lazyInitialization);
}

public DateTimeModel getDateTimeModel() {
return getDateTimeModel(null, true);
}

public DateTimeModel getDateTimeModel(String culture, boolean fallbackToDefaultCulture) {
return getModel(DateTimeModel.class, culture, fallbackToDefaultCulture);
}

//region Helper methods for less verbosity
public static List<ModelResult> recognizeDateTime(String query, String culture) {
return recognizeByModel(recognizer -> recognizer.getDateTimeModel(culture, true), query, DateTimeOptions.None, LocalDateTime.now());
}

public static List<ModelResult> recognizeDateTime(String query, String culture, DateTimeOptions options) {
return recognizeByModel(recognizer -> recognizer.getDateTimeModel(culture, true), query, options, LocalDateTime.now());
}

public static List<ModelResult> recognizeDateTime(String query, String culture, DateTimeOptions options, boolean fallbackToDefaultCulture) {
return recognizeByModel(recognizer -> recognizer.getDateTimeModel(culture, fallbackToDefaultCulture), query, options, LocalDateTime.now());
}

public static List<ModelResult> recognizeDateTime(String query, String culture, DateTimeOptions options, boolean fallbackToDefaultCulture, LocalDateTime reference) {
return recognizeByModel(recognizer -> recognizer.getDateTimeModel(culture, fallbackToDefaultCulture), query, options, reference);
}
//endregion

private static List<ModelResult> recognizeByModel(Function<DateTimeRecognizer, DateTimeModel> getModelFun, String query, DateTimeOptions options, LocalDateTime reference) {
DateTimeRecognizer recognizer = new DateTimeRecognizer(options);
DateTimeModel model = getModelFun.apply(recognizer);
return model.parse(query, reference);
}

@Override
protected void initializeConfiguration() {
//region English
registerModel(DateTimeModel.class, Culture.English,
(options) -> new DateTimeModel(null, null));
//endregion
}
}
Loading

0 comments on commit ca137ac

Please sign in to comment.