diff --git a/build.gradle b/build.gradle index ce09404..c969263 100644 --- a/build.gradle +++ b/build.gradle @@ -11,14 +11,13 @@ buildscript { classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version" classpath "org.jetbrains.kotlin:kotlin-allopen:$kotlin_version" classpath group: 'net.rubygrapefruit', name: 'native-platform', version: '0.21' + classpath 'com.codingfeline.buildkonfig:buildkonfig-gradle-plugin:0.9.0' } } -plugins { - id 'org.jetbrains.kotlin.multiplatform' version '1.3.41' -} - apply plugin: 'maven-publish' +apply plugin: 'org.jetbrains.kotlin.multiplatform' +apply plugin: 'com.codingfeline.buildkonfig' repositories { mavenCentral() @@ -115,6 +114,16 @@ task fatFramework(type: FatFrameworkTask) { ) } +buildkonfig { + packageName = 'com.example.app' + // objectName = 'YourAwesomeConfig' + // exposeObjectWithName = 'YourAwesomePublicConfig' + + defaultConfigs { + buildConfigField 'STRING', 'isDebug', 'true' + } +} + apply from: rootProject.file('pom.gradle') apply from: rootProject.file('gradle/publish.gradle') diff --git a/gradle.properties b/gradle.properties index 433610a..9d8c903 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,7 +1,7 @@ kotlin.code.style=official groupId=in.messai.kyuga kyugaVersion=0.1.0 -kotlin_version=1.4.20 +kotlin_version=1.4.32 projectGithubUrl=https://github.com/messai-engineering/Kyuga projectDescription=Kyuga diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index ad43b96..bfcf99f 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -3,4 +3,4 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-6.0.1-all.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-6.1.1-all.zip diff --git a/local.properties b/local.properties new file mode 100644 index 0000000..a8c1e2e --- /dev/null +++ b/local.properties @@ -0,0 +1,8 @@ +## This file must *NOT* be checked into Version Control Systems, +# as it contains information specific to your local configuration. +# +# Location of the SDK. This is only used by Gradle. +# For customization when using a Version Control System, please read the +# header note. +#Fri Aug 13 23:00:43 IST 2021 +sdk.dir=/Users/tcadmin/Library/Android/sdk diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/Kyuga.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/Kyuga.kt new file mode 100644 index 0000000..361feaf --- /dev/null +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/Kyuga.kt @@ -0,0 +1,412 @@ +package com.twelfthmile.kyuga + +import com.example.app.BuildKonfig +import com.twelfthmile.kyuga.expectations.MultDate +import com.twelfthmile.kyuga.expectations.formatDateDefault +import com.twelfthmile.kyuga.expectations.log +import com.twelfthmile.kyuga.regex.EMAIL_ADDRESS +import com.twelfthmile.kyuga.states.* +import com.twelfthmile.kyuga.model.StateContext +import com.twelfthmile.kyuga.model.StateResult +import com.twelfthmile.kyuga.types.* +import com.twelfthmile.kyuga.utils.* + +fun Char.isAlpha(): Boolean = this in 'a'..'z' || this in 'A'..'Z' +private val TOKENIZE_REGEX = "[. ]".toRegex() + +object Kyuga { + + private val isDebug = BuildKonfig.isDebug.toBoolean() + private val root: RootTrie + get() = LazyHolder.root + + private object LazyHolder { + var root = KyugaTrie().root + } + + fun tokenize(message: String): String { + val cleanMessage = message + .replace(EMAIL_ADDRESS, " EMAILADDR ") + val candidateTokens = cleanMessage + .split(TOKENIZE_REGEX) + .map { it.trim() } + return try { + val tokens = tokenize(candidateTokens).filter { it.isNotBlank() } + tokens.filterIndexed { index, it -> + if (it.isNotEmpty()) { + if (index > 0) + tokens[index - 1] != it + else + true + } else + false + } + } catch (e: Exception) { + candidateTokens + }.joinToString(" ") + } + + fun tokenize(message: List): List = message.map { + val parseResponse = parse(it) + parseResponse?.type ?: when (Util.checkForId(it)) { + true -> if (it != "EMAILADDR") "IDVAL" else it + false -> it + } + } + + /** + * Returns Pair of index upto which date was read and the date object + * + * @param str date string + * @return A last index for date string, b date object + * returns null if string is not of valid date format + */ + fun parseDate(str: String): Pair? { + val configMap = generateDefaultConfig() + return getIntegerDatePair(str, configMap) + } + + /** + * Returns Pair of index upto which date was read and the date object + * + * @param str date string + * @param config pass the message date string for defaulting + * @return A last index for date string, b date object + * returns null if string is not of valid date format + */ + + fun parseDate(str: String, config: Map): Pair? { + return getIntegerDatePair(str, config) + } + + /** + * Returns Response containing data-type, captured string and index upto which data was read + * + * @param str string to be parsed + * @param config config for parsing (Eg: date-defaulting) + * @return Yuga Response type + */ + + fun parse(str: String, config: Map): Response? { + return getResponse(str, config) + } + + private fun getIntegerDatePair(str: String, configMap: Map): Pair? { + val (a, b) = parseInternal(str, configMap) ?: return null + val d = b.getDate(configMap) ?: return null + return Pair(a, d) + } + + private fun getResponse(str: String, config: Map): Response? { + val p = parseInternal(str, config) ?: return null + val (a, b) = prepareResult(str, p, config)!! + return when (b) { + is MultDate -> Response(a, p.second.getValMap(), b, p.first) + is String -> Response(a, p.second.getValMap(), b, p.first) + else -> throw IllegalArgumentException("Error while creating response") + } + } + + /** + * Returns Response containing data-type, captured string and index upto which data was read + * + * @param str string to be parsed + * @return Yuga Response type + */ + + fun parse(str: String): Response? { + val configMap = generateDefaultConfig() + return getResponse(str, configMap) + } + + // Pair + private fun prepareResult( + str: String, + p: Pair, + config: Map + ): Pair? { + val index = p.first + val map = p.second + if (map.type == TY_DTE) { + if (map.contains(DT_MMM) && map.size() < 3) + //may fix + return Pair(TY_STR, str.substring(0, index)) + if (map.contains(DT_HH) && map.contains(DT_mm) && !map.contains(DT_D) && !map.contains( + DT_DD + ) && !map.contains(DT_MM) && !map.contains(DT_MMM) && !map.contains(DT_YY) && !map.contains( + DT_YYYY + ) + ) { + map.setType(TY_TME, null) + map.setVal("time", map[DT_HH] + ":" + map[DT_mm]) + return Pair(TY_TME, str.substring(0, index)) + } + val d = map.getDate(config) + return if (d != null) + p.second.type?.let { Pair(it, d) } + else + Pair(TY_STR, str.substring(0, index)) + } else { + return if (map[map.type!!] != null) { + if (map.type == TY_ACC && config.containsKey(YUGA_SOURCE_CONTEXT) && config[YUGA_SOURCE_CONTEXT] == YUGA_SC_CURR) { + Pair(TY_AMT, map[map.type!!]!!.replace("X".toRegex(), "")) + } else { + p.second.type?.let { map[map.type!!]?.let { tg -> Pair(it, tg) } } + } + } else + p.second.type?.let { Pair(it, str.substring(0, index)) } + + } + } + + fun generateDefaultConfig(): Map { + val config = mutableMapOf() + config[YUGA_CONF_DATE] = formatDateDefault(MultDate()) + return config + } + + private fun parseInternal(inputStr: String, config: Map): Pair? { + var str = inputStr + var state = 1 + var i = 0 + var c: Char + val map = FsaContextMap() + val delimiterStack = DelimiterStack() + str = str.toLowerCase() + var counter = 0 + while (state > 0 && i < str.length) { + c = str[i] + fun getStateContext() = StateContext(root, str, c, map, i, delimiterStack, config, counter) + val fsaLogicByState: ((context: StateContext) -> StateResult?) = when (state) { + 1 -> ::state1 + 2 -> ::state2 + 3 -> ::state3 + 4 //hours to mins + -> ::state4 + 5 -> ::state5 + 6 //for seconds + -> ::state6 + 7 -> ::state7 + 8 -> ::state8 + 9 -> ::state9//handle for num case + 10 -> ::state10 + 11 -> ::state11 + 12 -> ::state12 + 13 -> ::state13 + 14 -> ::state14 + 15 -> ::state15 + 16 -> ::state16//we should handle amt case, where comma led to 16 as opposed to 12 + 17 -> ::state17//we should handle amt case, where comma led to 16,17 as opposed to 12 + 18 -> ::state18//we should handle amt case, where comma led to 16,17 as opposed to 12 + 19 //year + -> ::state19 + 20 //year++ + -> ::state20 + 21 -> ::state21 + 22 -> ::state22 + 24 -> ::state24 + 25//potential year start comes here + -> ::state25 + 26 -> ::state26 + 27 -> ::state27 + 28 -> ::state28 + 29 -> ::state29 + 30 -> ::state30 + 31 -> ::state31 + 32 -> ::state32 + 33 -> ::state33 + 34 -> ::state34 + 35 -> ::state35 + 36 -> ::state36 + 37 -> ::state37 + 38 -> ::state38 + 39//instrno + -> ::state39 + 40 -> ::state40 + 41//for phone numbers; same as 12 + space; coming from 27 + -> ::state41 + 42 //18=12 case, where 7-2209 was becoming amt as part of phn support + -> ::state42 + 43 //1234567890@ybl + -> ::state43 + 44 -> ::state44 + 45 -> ::state45 + else -> ::state0 + } + fsaLogicByState(getStateContext())?.let { + state = it.state + i = it.index + counter = it.counter + } ?: return null + i++ + if (isDebug) { + log("ch:" + c + " state:" + state + " map:" + map.print()) + } + } + if (map.type == null) + return null + //sentence end cases + if (state == 10) { + map.pop() + i -= 1 + } else if (state == 36) { + if (counter == 12 || Util.isNumber(str.substring(1, i))) + map.setType(TY_NUM, TY_NUM) + else + return null + } + + if (map.type == TY_AMT) { + if (!map.contains(map.type!!) || map[map.type!!]!!.contains(".") && map[map.type!!]!!.split("\\.".toRegex()) + .dropLastWhile { it.isEmpty() }.toTypedArray()[0].length > 8 || !map[map.type!!]!!.contains( + "." + ) && map[map.type!!]!!.length > 8 + ) + map.setType(TY_NUM, TY_NUM) + } + + if (map.type == TY_NUM) { + if (i < str.length && str[i].isAlpha() && !config.containsKey(YUGA_SOURCE_CONTEXT)) { + var j = i + while (j < str.length && str[j] != ' ') + j++ + map.setType(TY_STR, TY_STR) + i = j + } else if (map[TY_NUM] != null) { + if (map[TY_NUM]!!.length == 10 && (map[TY_NUM]!![0] == '9' || map[TY_NUM]!![0] == '8' || map[TY_NUM]!![0] == '7')) + map.setVal("num_class", TY_PHN) + else if (map[TY_NUM]!!.length == 12 && map[TY_NUM]!!.startsWith("91")) + map.setVal("num_class", TY_PHN) + else if (map[TY_NUM]!!.length == 11 && map[TY_NUM]!!.startsWith("18")) + map.setVal("num_class", TY_PHN) + else if (map[TY_NUM]!!.length == 11 && map[TY_NUM]!![0] == '0') + map.setVal("num_class", TY_PHN) + else + map.setVal("num_class", TY_NUM) + } + } else if (map.type == TY_DTE && i + 1 < str.length) { + val `in` = i + skip(str.substring(i)) + val sub = str.substring(`in`) + if (`in` < str.length) { + val pFSATimePrex = Util.checkTypes(root, "FSA_TIMEPRFX", sub) + val pFSATz = Util.checkTypes(root, "FSA_TZ", sub) + if (Util.isNumber(str[`in`]) || Util.checkTypes( + root, + "FSA_MONTHS", + sub + ) != null || Util.checkTypes(root, "FSA_DAYS", sub) != null + ) { + val kl = parseInternal(sub, config) + if (kl != null && kl.second.type == TY_DTE) { + map.putAll(kl.second) + i = `in` + kl.first + } + } else if (pFSATimePrex != null) { + val iTime = `in` + pFSATimePrex.first + 1 + skip(str.substring(`in` + pFSATimePrex.first + 1)) + if (iTime < str.length && (Util.isNumber(str[iTime]) || Util.checkTypes( + root, + "FSA_DAYS", + str.substring(iTime) + ) != null) + ) { + val p_ = parseInternal(str.substring(iTime), config) + if (p_ != null && p_.second.type == TY_DTE) { + map.putAll(p_.second) + i = iTime + p_.first + } + } + } else if (pFSATz != null) { + val j = skipForTZ(str.substring(`in` + pFSATz.first + 1), map) + i = `in` + pFSATz.first + 1 + j + } else if (sub.toLowerCase().startsWith("pm") || sub.toLowerCase().startsWith("am")) { + //todo handle appropriately for pm + i = `in` + 2 + } + } + } else if (map.type == TY_TMS) { + val v = map[map.type!!] + if (v != null && v.length == 8 && Util.isHour(v[0], v[1]) && Util.isHour(v[4], v[5])) { + v.substring(0, 4).extractTime(map.getValMap(), "dept") + v.substring(4, 8).extractTime(map.getValMap(), "arrv") + } + } + return Pair(i, map) + } + + private fun skipForTZ(str: String, map: FsaContextMap): Int { + var state = 1 + var i = 0 + var c: Char + while (state > 0 && i < str.length) { + c = str[i] + when (state) { + 1 -> if (c.toInt() == CH_SPACE || c.toInt() == CH_PLUS || Util.isNumber(c)) + state = 1 + else if (c.toInt() == CH_COLN) + state = 2 + else { + val s_ = str.substring(0, i).trim { it <= ' ' } + state = if (s_.length == 4 && Util.isNumber(s_)) {//we captured a year after IST Mon Sep 04 13:47:13 IST 2017 + map.put(DT_YYYY, s_) + -2 + } else + -1 + } + 2 -> + //todo re-adjust GMT time, current default +5:30 for IST + state = if (Util.isNumber(c)) + 3 + else + -1 + 3 -> state = if (Util.isNumber(c)) + 4 + else + -1 + 4 -> state = if (c.toInt() == CH_SPACE) + 5 + else + -2 + 5 -> { + val sy = str.substring(i, i + 4) + if (i + 3 < str.length && Util.isNumber(sy)) { + map.put(DT_YYYY, sy) + i += 3 + } + state = -2 + } + } + i++ + } + val s_ = str.substring(0, i).trim { it <= ' ' } + if (state == 1 && s_.length == 4 && Util.isNumber(s_)) + //we captured a year after IST Mon Sep 04 13:47:13 IST 2017 + map.put(DT_YYYY, s_) + return if (state == -1) 0 else i + } + + private fun skip(str: String): Int { + var i = 0 + while (i < str.length) { + if (str[i] == ' ' || str[i] == ',' || str[i] == '(' || str[i] == ':') + i++ + else + break + } + return i + } + + internal class DelimiterStack { + private val stack: MutableList = mutableListOf() + + fun push(ch: Char) { + stack.add(ch) + } + + fun pop(): Char { + return if (stack.isNotEmpty()) { + stack[stack.size - 1] + } else '~' + } + } + +} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/kYuga.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/kYuga.kt deleted file mode 100644 index 83cee95..0000000 --- a/src/commonMain/kotlin/com/twelfthmile/kyuga/kYuga.kt +++ /dev/null @@ -1,1198 +0,0 @@ -package com.twelfthmile.kyuga - -import com.twelfthmile.kyuga.expectations.MultDate -import com.twelfthmile.kyuga.expectations.formatDateDefault -import com.twelfthmile.kyuga.expectations.log -import com.twelfthmile.kyuga.regex.EMAIL_ADDRESS -import com.twelfthmile.kyuga.regex.PHONE -import com.twelfthmile.kyuga.regex.WEB_URL -import com.twelfthmile.kyuga.types.GenTrie -import com.twelfthmile.kyuga.types.Pair -import com.twelfthmile.kyuga.types.Response -import com.twelfthmile.kyuga.types.RootTrie -import com.twelfthmile.kyuga.utils.* - -fun Char.isAlpha(): Boolean = this in 'a'..'z' || this in 'A'..'Z' -private val TOKENIZE_REGEX = "[. ]".toRegex() - -object Kyuga { - - private val D_DEBUG = false - - private val root: RootTrie - get() = LazyHolder.root - - private object LazyHolder { - internal var root = createRoot() - } - - private fun createRoot(): RootTrie { - val root = RootTrie() - root.next["FSA_MONTHS"] = GenTrie() - root.next["FSA_DAYS"] = GenTrie() - root.next["FSA_TIMEPRFX"] = GenTrie() - root.next["FSA_AMT"] = GenTrie() - root.next["FSA_TIMES"] = GenTrie() - root.next["FSA_TZ"] = GenTrie() - root.next["FSA_DAYSFFX"] = GenTrie() - root.next["FSA_UPI"] = GenTrie() - seeding(FSA_MONTHS, root.next["FSA_MONTHS"]) - seeding(FSA_DAYS, root.next["FSA_DAYS"]) - seeding(FSA_TIMEPRFX, root.next["FSA_TIMEPRFX"]) - seeding(FSA_AMT, root.next["FSA_AMT"]) - seeding(FSA_TIMES, root.next["FSA_TIMES"]) - seeding(FSA_TZ, root.next["FSA_TZ"]) - seeding(FSA_DAYSFFX, root.next["FSA_DAYSFFX"]) - seeding(FSA_UPI, root.next["FSA_UPI"]) - return root - } - - private fun seeding(type: String, root: GenTrie?) { - var t: GenTrie? - var c = 0 - for (fsaCldr in type.split(",").toTypedArray()) { - c++ - t = root - val len = fsaCldr.length - var i = 0 - while (i < len) { - val ch = fsaCldr[i] - t!!.child = true - if (!t.next.containsKey(ch)) t.next[ch] = GenTrie() - t = t.next[ch] - if (i == len - 1) { - t!!.leaf = true - t.token = fsaCldr.replace(";", "") - } else if (i < len - 1 && fsaCldr[i + 1].toInt() == 59) { //semicolon - t!!.leaf = true - t.token = fsaCldr.replace(";", "") - i++ //to skip semicolon - } - i++ - } - } - } - - fun tokenise(message: List): List = message.map { - val parseResponse = parse(it) - parseResponse?.type ?: when (Util.checkForId(it)) { - true -> if (it != "EMAILADDR") "IDVAL" else it - false -> it - } - } - - fun tokenize(message: String): String { - val cleanMessage = message - .replace(EMAIL_ADDRESS, " EMAILADDR ") - val candidateTokens = cleanMessage - .split(TOKENIZE_REGEX) - .map { it.trim() } - return try { - val tokens = tokenise(candidateTokens).filter { it.isNotBlank() } - tokens.filterIndexed { index, it -> - if (it.isNotEmpty()) { - if (index > 0) - tokens[index - 1] != it - else - true - } else - false - } - } catch (e: Exception) { - candidateTokens - }.joinToString(" ") - } - - /** - * Returns Pair of index upto which date was read and the date object - * - * @param str date string - * @return A last index for date string, b date object - * returns null if string is not of valid date format - */ - fun parseDate(str: String): Pair? { - val configMap = generateDefaultConfig() - return getIntegerDatePair(str, configMap) - } - - private fun getIntegerDatePair(str: String, configMap: Map): Pair? { - val (a, b) = parseInternal(str, configMap) ?: return null - val d = b.getDate(configMap) ?: return null - return Pair(a, d) - } - - /** - * Returns Pair of index upto which date was read and the date object - * - * @param str date string - * @param config pass the message date string for defaulting - * @return A last index for date string, b date object - * returns null if string is not of valid date format - */ - - fun parseDate(str: String, config: Map): Pair? { - return getIntegerDatePair(str, config) - } - - /** - * Returns Response containing data-type, captured string and index upto which data was read - * - * @param str string to be parsed - * @param config config for parsing (Eg: date-defaulting) - * @return Yuga Response type - */ - - fun parse(str: String, config: Map): Response? { - return getResponse(str, config) - } - - private fun getResponse(str: String, config: Map): Response? { - val p = parseInternal(str, config) ?: return null - val (a, b) = prepareResult(str, p, config)!! - return when (b) { - is MultDate -> Response(a, p.b.getValMap(), b, p.a) - is String -> Response(a, p.b.getValMap(), b, p.a) - else -> throw IllegalArgumentException("Error while creating response") - } - } - - /** - * Returns Response containing data-type, captured string and index upto which data was read - * - * @param str string to be parsed - * @return Yuga Response type - */ - - fun parse(str: String): Response? { - val configMap = generateDefaultConfig() - return getResponse(str, configMap) - } - - // Pair - private fun prepareResult( - str: String, - p: Pair, - config: Map - ): Pair? { - val index = p.a - val map = p.b - if (map.type == TY_DTE) { - if (map.contains(DT_MMM) && map.size() < 3) - //may fix - return Pair(TY_STR, str.substring(0, index)) - if (map.contains(DT_HH) && map.contains(DT_mm) && !map.contains(DT_D) && !map.contains( - DT_DD - ) && !map.contains(DT_MM) && !map.contains(DT_MMM) && !map.contains(DT_YY) && !map.contains( - DT_YYYY - ) - ) { - map.setType(TY_TME, null) - map.setVal("time", map[DT_HH] + ":" + map[DT_mm]) - return Pair(TY_TME, str.substring(0, index)) - } - val d = map.getDate(config) - return if (d != null) - p.b.type?.let { Pair(it, d) } - else - Pair(TY_STR, str.substring(0, index)) - } else { - return if (map[map.type!!] != null) { - if (map.type == TY_ACC && config.containsKey(YUGA_SOURCE_CONTEXT) && config[YUGA_SOURCE_CONTEXT] == YUGA_SC_CURR) { - Pair(TY_AMT, map[map.type!!]!!.replace("X".toRegex(), "")) - } else { - p.b.type?.let { map[map.type!!]?.let { tg -> Pair(it, tg) } } - } - } else - p.b.type?.let { Pair(it, str.substring(0, index)) } - - } - } - - private fun generateDefaultConfig(): Map { - val config = mutableMapOf() - config[YUGA_CONF_DATE] = formatDateDefault(MultDate()) - return config - } - - fun checkTypes(type: String, word: String): Pair? { - return Util.checkTypes(root, type, word) - } - - - private fun parseInternal(inputStr: String, config: Map): Pair? { - var str = inputStr - var state = 1 - var i = 0 - var c: Char - val map = FsaContextMap() - val delimiterStack = DelimiterStack() - str = str.toLowerCase() - var counter = 0 - while (state > 0 && i < str.length) { - c = str[i] - when (state) { - 1 -> if (Util.isNumber(c)) { - map.setType(TY_NUM, null) - map.put(TY_NUM, c) - state = 2 - } else if (Util.checkTypes(root, "FSA_MONTHS", str.substring(i))?.let { - map.setType(TY_DTE, null) - map.put(DT_MMM, it.b) - i += it.a - true - } == true) { - state = 33 - } else if (Util.checkTypes(root, "FSA_DAYS", str.substring(i))?.let { - map.setType(TY_DTE, null) - map.put(DT_DD, it.b) - i += it.a - true - } == true) { - state = 30 - } else if (c.toInt() == CH_HYPH) {//it could be a negative number - state = 37 - } else if (c.toInt() == CH_LSBT) {//it could be an OTP - state = 1 - } else { - state = accAmtNumPct(str, i, map, config) - if (map.type == null) - return null - if (state == -1 && map.type != TY_PCT) { - i -= 1 - } - } - - 2 -> if (Util.isNumber(c)) { - map.append(c) - state = 3 - } else if (Util.isTimeOperator(c)) { - delimiterStack.push(c) - map.setType(TY_DTE, DT_HH) - state = 4 - } else if (Util.isDateOperator(c) || c.toInt() == CH_COMA) { - delimiterStack.push(c) - map.setType(TY_DTE, DT_D) - state = 16 - } else if (checkMonthType(str, i)?.let { - map.setType(TY_DTE, DT_D) - map.put(DT_MMM, it.b) - i += it.a - true - } == true) { - state = 24 - } else { - state = accAmtNumPct(str, i, map, config) - if (state == -1 && map.type != TY_PCT) - i -= 1 - } - 3 -> if (Util.isNumber(c)) { - map.append(c) - state = 8 - } else if (Util.isTimeOperator(c)) { - delimiterStack.push(c) - map.setType(TY_DTE, DT_HH) - state = 4 - } else if (Util.isDateOperator(c) || c.toInt() == CH_COMA) { - delimiterStack.push(c) - map.setType(TY_DTE, DT_D) - state = 16 - } else if (checkMonthType(str, i)?.let { - map.setType(TY_DTE, DT_D) - map.put(DT_MMM, it.b) - i += it.a - true - } == true) { - state = 24 - } else if (Util.checkTypes(root, "FSA_DAYSFFX", str.substring(i))?.let { - map.setType(TY_DTE, DT_D) - i += it.a - true - } == true) { - state = 32 - } else { - state = accAmtNumPct(str, i, map, config) - if (state == -1 && map.type != TY_PCT) - i -= 1 - } - 4 //hours to mins - -> if (Util.isNumber(c)) { - map.upgrade(c)//hh to mm - state = 5 - } else { //saw a colon randomly, switch back to num from hours - if (!map.contains(DT_MMM)) - map.setType(TY_NUM, TY_NUM) - i -= 2 //move back so that colon is omitted - state = -1 - } - 5 -> if (Util.isNumber(c)) { - map.append(c) - state = 5 - } else if (c.toInt() == CH_COLN) - state = 6 - else if (c == 'a' && i + 1 < str.length && str[i + 1] == 'm') { - i += 1 - state = -1 - } else if (c == 'p' && i + 1 < str.length && str[i + 1] == 'm') { - map.put(DT_HH, (map[DT_HH]!!.toInt() + 12).toString()) - i += 1 - state = -1 - } else if (Util.checkTypes(root, "FSA_TIMES", str.substring(i))?.let { - i += it.a - true - } == true) { - state = -1 - } else - state = 7 - 6 //for seconds - -> if (Util.isNumber(c)) { - map.upgrade(c) - if (i + 1 < str.length && Util.isNumber(str[i + 1])) - map.append(str[i + 1]) - i = i + 1 - state = -1 - } else - state = -1 - 7 -> { - if (c == 'a' && i + 1 < str.length && str[i + 1] == 'm') { - i = i + 1 - val hh = map[DT_HH]!!.toInt() - if (hh == 12) - map.put(DT_HH, 0.toString()) - } else if (c == 'p' && i + 1 < str.length && str[i + 1] == 'm') { - val hh = map[DT_HH]!!.toInt() - if (hh != 12) - map.put(DT_HH, (hh + 12).toString()) - i = i + 1 - } else if (Util.checkTypes(root, "FSA_TIMES", str.substring(i))?.let { - i += it.a - true - } == true) { - // emptiness - } else - i -= 2 - state = -1 - } - 8 -> if (Util.isNumber(c)) { - map.append(c) - state = 9 - } else { - state = accAmtNumPct(str, i, map, config) - if (c.toInt() == CH_SPACE && state == -1 && i + 1 < str.length && Util.isNumber(str[i + 1])) - state = 12 - else if (c.toInt() == CH_HYPH && state == -1 && i + 1 < str.length && Util.isNumber(str[i + 1])) - state = 45 - else if (state == -1 && map.type != TY_PCT) - i = i - 1 - } - 9 -> if (Util.isDateOperator(c)) { - delimiterStack.push(c) - state = 25 - } else if (Util.isNumber(c)) { - map.append(c) - counter = 5 - state = 15 - } else { - state = accAmtNumPct(str, i, map, config) - if (state == -1 && map.type != TY_PCT) {//NUM - i = i - 1 - } - }//handle for num case - 10 -> if (Util.isNumber(c)) { - map.append(c) - map.setType(TY_AMT, TY_AMT) - state = 14 - } else { //saw a fullstop randomly - map.pop()//remove the dot which was appended - i = i - 2 - state = -1 - } - 11 -> if (c.toInt() == 42 || c.toInt() == 88 || c.toInt() == 120) - //*Xx - map.append('X') - else if (c.toInt() == CH_HYPH) - state = 11 - else if (Util.isNumber(c)) { - map.append(c) - state = 13 - } else if (c == ' ' && i + 1 < str.length && (str[i + 1].toInt() == 42 || str[i + 1].toInt() == 88 || str[i + 1].toInt() == 120 || Util.isNumber( - str[i + 1] - )) - ) - state = 11 - else if (c.toInt() == CH_FSTP && lookAheadForInstr(str, i).let { - if (it > 0) { - i = it - true - } else { - false - } - }) { - // emptiness - } else { - i -= 1 - state = -1 - } - 12 -> if (Util.isNumber(c)) { - map.setType(TY_AMT, TY_AMT) - map.append(c) - } else if (c.toInt() == CH_COMA) - //comma - state = 12 - else if (c.toInt() == CH_FSTP) { //dot - map.append(c) - state = 10 - } else if (c.toInt() == CH_HYPH && i + 1 < str.length && Util.isNumber(str[i + 1])) { - state = 39 - } else { - if (i - 1 > 0 && str[i - 1].toInt() == CH_COMA) - i = i - 2 - else - i = i - 1 - state = -1 - } - 13 -> if (Util.isNumber(c)) - map.append(c) - else if (c.toInt() == 42 || c.toInt() == 88 || c.toInt() == 120) - //*Xx - map.append('X') - else if (c.toInt() == CH_FSTP && config.containsKey(YUGA_SOURCE_CONTEXT) && config[YUGA_SOURCE_CONTEXT] == YUGA_SC_CURR) { //LIC **150.00 fix - map.setType(TY_AMT, TY_AMT) - map.put(TY_AMT, map[TY_AMT]!!.replace("X".toRegex(), "")) - map.append(c) - state = 10 - } else if (c.toInt() == CH_FSTP && lookAheadForInstr(str, i).let { - if (it > 0) { - i = it - true - } else { - false - } - }) { - // emptiness - } else { - i = i - 1 - state = -1 - } - 14 -> if (Util.isNumber(c)) { - map.append(c) - } else if (c.toInt() == CH_PCT) { - map.setType(TY_PCT, TY_PCT) - state = -1 - } else if ((c == 'k' || c == 'c') && i + 1 < str.length && str[i + 1] == 'm') { - map.setType(TY_DST, TY_DST) - i += 1 - state = -1 - } else if ((c == 'k' || c == 'm') && i + 1 < str.length && str[i + 1] == 'g') { - map.setType(TY_WGT, TY_WGT) - i += 1 - state = -1 - } else { - var tempBrk = true - if (c.toInt() == CH_FSTP && i + 1 < str.length && Util.isNumber(str[i + 1])) { - val samt = map[map.type!!] - if (samt!!.contains(".")) { - val samtarr = samt.split("\\.".toRegex()) - if (samtarr.size == 2) { - map.type = TY_DTE - map.put(DT_D, samtarr[0]) - map.put(DT_MM, samtarr[1]) - state = 19 - tempBrk = false - } - } - } - if (tempBrk) { - i -= 1 - state = -1 - } - } - 15 -> if (Util.isNumber(c)) { - counter++ - map.append(c) - } else if (c.toInt() == CH_COMA) - //comma - state = 12 - else if (c.toInt() == CH_FSTP) { //dot - map.append(c) - state = 10 - } else if ((c.toInt() == 42 || c.toInt() == 88 || c.toInt() == 120) && i + 1 < str.length && (Util.isNumber( - str[i + 1] - ) || str[i + 1].toInt() == CH_HYPH || str[i + 1].toInt() == 42 || str[i + 1].toInt() == 88 || str[i + 1].toInt() == 120) - ) {//*Xx - map.setType(TY_ACC, TY_ACC) - map.append('X') - state = 11 - } else if (c.toInt() == CH_SPACE && i + 2 < str.length && Util.isNumber(str[i + 1]) && Util.isNumber( - str[i + 2] - ) - ) { - state = 41 - } else { - i = i - 1 - state = -1 - }// else if (c == Constants.CH_ATRT) { - // delimiterStack.push(c); - // state = 43; - // } - 16 -> if (Util.isNumber(c)) { - map.upgrade(c) - state = 17 - } else if (c.toInt() == CH_SPACE || c.toInt() == CH_COMA) - state = 16 - else if (checkMonthType(str, i)?.let { - map.put(DT_MMM, it.b) - i += it.a - true - } == true) { - state = 24 - } else if (c.toInt() == CH_FSTP) { //dot - map.setType(TY_NUM, TY_NUM) - map.append(c) - state = 10 - } else if (i > 0 && Util.checkTypes(root, "FSA_TIMES", str.substring(i))?.let { - map.setType(TY_TME, null) - var s = str.substring(0, i) - if (it.b == "mins" || it.b == "minutes") - s = "00$s" - extractTime(s, map.getValMap()) - i += it.a - true - } == true) { - state = -1 - } else {//this is just a number, not a date - //to cater to 16 -Nov -17 - if (delimiterStack.pop() - .toInt() == CH_SPACE && c.toInt() == CH_HYPH && i + 1 < str.length && (Util.isNumber( - str[i + 1] - ) || checkMonthType(str, i + 1) != null) - ) { - state = 16 - } else { - map.setType(TY_NUM, TY_NUM) - var j = i - while (!Util.isNumber(str[j])) - j-- - i = j - state = -1 - } - }//we should handle amt case, where comma led to 16 as opposed to 12 - 17 -> if (Util.isNumber(c)) { - map.append(c) - state = 18 - } else if (Util.isDateOperator(c)) { - delimiterStack.push(c) - state = 19 - } else if (c.toInt() == CH_COMA && delimiterStack.pop().toInt() == CH_COMA) { //comma - map.setType(TY_NUM, TY_NUM) - state = 12 - } else if (c.toInt() == CH_FSTP && delimiterStack.pop().toInt() == CH_COMA) { //dot - map.setType(TY_NUM, TY_NUM) - map.append(c) - state = 10 - } else { - map.setType(TY_STR, TY_STR) - i = i - 1 - state = -1 - }//we should handle amt case, where comma led to 16,17 as opposed to 12 - 18 -> if (Util.isDateOperator(c)) { - delimiterStack.push(c) - state = 19 - } else if (Util.isNumber(c) && delimiterStack.pop().toInt() == CH_COMA) { - map.setType(TY_NUM, TY_NUM) - state = 12 - map.append(c) - } else if (Util.isNumber(c) && delimiterStack.pop().toInt() == CH_HYPH) { - map.setType(TY_NUM, TY_NUM) - state = 42 - map.append(c) - } else if (c.toInt() == CH_COMA && delimiterStack.pop().toInt() == CH_COMA) { //comma - map.setType(TY_NUM, TY_NUM) - state = 12 - } else if (c.toInt() == CH_FSTP && delimiterStack.pop().toInt() == CH_COMA) { //dot - map.setType(TY_NUM, TY_NUM) - map.append(c) - state = 10 - } else if (c.toInt() == CH_FSTP && map.contains(DT_D) && map.contains(DT_MM)) { //dot - state = -1 - } else { - map.setType(TY_STR, TY_STR) - i = i - 1 - state = -1 - }//we should handle amt case, where comma led to 16,17 as opposed to 12 - 19 //year - -> if (Util.isNumber(c)) { - map.upgrade(c) - state = 20 - } else { - i = i - 2 - state = -1 - } - 20 //year++ - -> if (Util.isNumber(c)) { - map.append(c) - state = 21 - } else if (c == ':') { - if (map.contains(DT_YY)) - map.convert(DT_YY, DT_HH) - else if (map.contains(DT_YYYY)) - map.convert(DT_YYYY, DT_HH) - state = 4 - } else { - map.remove(DT_YY)//since there is no one number year - i = i - 1 - state = -1 - } - 21 -> if (Util.isNumber(c)) { - map.upgrade(c) - state = 22 - } else if (c == ':') { - if (map.contains(DT_YY)) - map.convert(DT_YY, DT_HH) - else if (map.contains(DT_YYYY)) - map.convert(DT_YYYY, DT_HH) - state = 4 - } else { - i = i - 1 - state = -1 - } - 22 -> if (Util.isNumber(c)) { - map.append(c) - state = -1 - } else { - map.remove(DT_YYYY)//since there is no three number year - i = i - 1 - state = -1 - } - 24 -> if (Util.isDateOperator(c) || c.toInt() == CH_COMA) { - delimiterStack.push(c) - state = 24 - } else if (Util.isNumber(c)) { - map.upgrade(c) - state = 20 - } else if (c.toInt() == CH_SQOT && i + 1 < str.length && Util.isNumber(str[i + 1])) { - state = 24 - } else if (c == '|') { - state = 24 - } else { - i = i - 1 - state = -1 - } - 25//potential year start comes here - -> if (Util.isNumber(c)) { - map.setType(TY_DTE, DT_YYYY) - map.put(DT_MM, c) - state = 26 - } else if (i > 0 && Util.checkTypes(root, "FSA_TIMES", str.substring(i))?.let { - map.setType(TY_TME, null) - var s = str.substring(0, i) - if (it.b == "mins") - s = "00$s" - extractTime(s, map.getValMap()) - i += it.a - true - } == true) { - state = -1 - } else { - //it wasn't year, it was just a number - i = i - 2 - state = -1 - } - 26 -> if (Util.isNumber(c)) { - map.append(c) - state = 27 - } else { - map.setType(TY_STR, TY_STR) - i = i - 1 - state = -1 - } - 27 -> if (Util.isDateOperator(c)) { - delimiterStack.push(c) - state = 28 - } else if (Util.isNumber(c)) {//it was a number, most probably telephone number - if (map.type == TY_DTE) { - map.setType(TY_NUM, TY_NUM) - } - map.append(c) - if ((delimiterStack.pop().toInt() == CH_SLSH || delimiterStack.pop() - .toInt() == CH_HYPH) && i + 1 < str.length && Util.isNumber( - str[i + 1] - ) && (i + 2 == str.length || Util.isDelimiter(str[i + 2])) - ) {//flight time 0820/0950 - map.setType(TY_TMS, TY_TMS) - map.append(str[i + 1]) - i = i + 1 - state = -1 - } else if (delimiterStack.pop().toInt() == CH_SPACE) { - state = 41 - } else - state = 12 - } else if (c.toInt() == 42 || c.toInt() == 88 || c.toInt() == 120) {//*Xx - map.setType(TY_ACC, TY_ACC) - map.append('X') - state = 11 - } else { - map.setType(TY_STR, TY_STR) - i = i - 1 - state = -1 - } - 28 -> if (Util.isNumber(c)) { - map.put(DT_D, c) - state = 29 - } else { - map.setType(TY_STR, TY_STR) - i = i - 2 - state = -1 - } - 29 -> { - if (Util.isNumber(c)) { - map.append(c) - } else - i = i - 1 - state = -1 - } - 30 -> if (c.toInt() == CH_COMA || c.toInt() == CH_SPACE) - state = 30 - else if (Util.isNumber(c)) { - map.put(DT_D, c) - state = 31 - } else { - map.type = TY_DTE - i = i - 1 - state = -1 - } - 31 -> if (Util.isNumber(c)) { - map.append(c) - state = 32 - } else if (checkMonthType(str, i)?.let { - map.put(DT_MMM, it.b) - i += it.a - true - } == true) { - state = 24 - } else if (c.toInt() == CH_COMA || c.toInt() == CH_SPACE) - state = 32 - else { - i = i - 1 - state = -1 - } - 32 -> if (checkMonthType(str, i)?.let { - map.put(DT_MMM, it.b) - i += it.a - true - } == true) { - state = 24 - } else if (c.toInt() == CH_COMA || c.toInt() == CH_SPACE) - state = 32 - else if (Util.checkTypes(root, "FSA_DAYSFFX", str.substring(i))?.let { - i += it.a - true - } == true) { - state = 32 - } else { - var j = i - while (!Util.isNumber(str[j])) - j-- - i = j - state = -1 - } - 33 -> if (Util.isNumber(c)) { - map.put(DT_D, c) - state = 34 - } else if (c.toInt() == CH_SPACE || c.toInt() == CH_COMA || c.toInt() == CH_HYPH) - state = 33 - else { - map.type = TY_DTE - i -= 1 - state = -1 - } - 34 -> if (Util.isNumber(c)) { - map.append(c) - state = 35 - } else if (c.toInt() == CH_SPACE || c.toInt() == CH_COMA) - state = 35 - else { - map.type = TY_DTE - i -= 1 - state = -1 - } - 35 -> if (Util.isNumber(c)) { - if (i > 1 && Util.isNumber(str[i - 1])) { - map.convert(DT_D, DT_YYYY) - map.append(c) - } else - map.put(DT_YY, c) - state = 20 - } else if (c.toInt() == CH_SPACE || c.toInt() == CH_COMA) - state = 40 - else { - map.type = TY_DTE - i -= 1 - state = -1 - } - 36 -> if (Util.isNumber(c)) { - map.append(c) - counter++ - } else if (c.toInt() == CH_FSTP && i + 1 < str.length && Util.isNumber(str[i + 1])) { - map.append(c) - state = 10 - } else if (c.toInt() == CH_HYPH && i + 1 < str.length && Util.isNumber(str[i + 1])) { - delimiterStack.push(c) - map.append(c) - state = 16 - } else { - if (counter == 12 || Util.isNumber(str.substring(1, i))) - map.setType(TY_NUM, TY_NUM) - else - return null - state = -1 - } - 37 -> if (Util.isNumber(c)) { - map.setType(TY_AMT, TY_AMT) - map.put(TY_AMT, '-') - map.append(c) - state = 12 - } else if (c.toInt() == CH_FSTP) { - map.put(TY_AMT, '-') - map.append(c) - state = 10 - } else - state = -1 - 38 -> { - i = map.index!! - state = -1 - } - 39//instrno - -> if (Util.isNumber(c)) - map.append(c) - else { - map.setType(TY_ACC, TY_ACC) - state = -1 - } - 40 -> if (Util.isNumber(c)) { - map.put(DT_YY, c) - state = 20 - } else if (c.toInt() == CH_SPACE || c.toInt() == CH_COMA) - state = 40 - else { - map.type = TY_DTE - i -= 1 - state = -1 - } - 41//for phone numbers; same as 12 + space; coming from 27 - -> when { - Util.isNumber(c) -> { - map.append(c) - } - c.toInt() == CH_SPACE -> state = 41 - else -> { - i = if (i - 1 > 0 && str[i - 1].toInt() == CH_SPACE) - i - 2 - else - i - 1 - state = -1 - } - } - 42 //18=12 case, where 7-2209 was becoming amt as part of phn support - -> if (Util.isNumber(c)) { - map.append(c) - } else if (c.toInt() == CH_HYPH && i + 1 < str.length && Util.isNumber(str[i + 1])) { - state = 39 - } else { - i -= 1 - state = -1 - } - 43 //1234567890@ybl - -> if (Util.isLowerAlpha(c) || Util.isNumber(c)) { - map.setType(TY_VPD, TY_VPD) - map.append(delimiterStack.pop()) - map.append(c) - state = 44 - } else { - state = -1 - } - 44 -> if (Util.isLowerAlpha(c) || Util.isNumber(c) || c.toInt() == CH_FSTP) { - map.append(c) - state = 44 - } else - state = -1 - 45 -> if (Util.isNumber(c)) { - map.append(c) - } else if (c.toInt() == CH_HYPH && i + 1 < str.length && Util.isNumber(str[i + 1])) { - state = 39 - } else { - i -= if (i - 1 > 0 && str[i - 1].toInt() == CH_COMA) - 2 - else - 1 - state = -1 - } - } - i++ - if (D_DEBUG) { - log("ch:" + c + " state:" + state + " map:" + map.print()) - } - } - if (map.type == null) - return null - //sentence end cases - if (state == 10) { - map.pop() - i -= 1 - } else if (state == 36) { - if (counter == 12 || Util.isNumber(str.substring(1, i))) - map.setType(TY_NUM, TY_NUM) - else - return null - } - - if (map.type == TY_AMT) { - if (!map.contains(map.type!!) || map[map.type!!]!!.contains(".") && map[map.type!!]!!.split("\\.".toRegex()) - .dropLastWhile { it.isEmpty() }.toTypedArray()[0].length > 8 || !map[map.type!!]!!.contains( - "." - ) && map[map.type!!]!!.length > 8 - ) - map.setType(TY_NUM, TY_NUM) - } - - if (map.type == TY_NUM) { - if (i < str.length && str[i].isAlpha() && !config.containsKey(YUGA_SOURCE_CONTEXT)) { - var j = i - while (j < str.length && str[j] != ' ') - j++ - map.setType(TY_STR, TY_STR) - i = j - } else if (map[TY_NUM] != null) { - if (map[TY_NUM]!!.length == 10 && (map[TY_NUM]!![0] == '9' || map[TY_NUM]!![0] == '8' || map[TY_NUM]!![0] == '7')) - map.setVal("num_class", TY_PHN) - else if (map[TY_NUM]!!.length == 12 && map[TY_NUM]!!.startsWith("91")) - map.setVal("num_class", TY_PHN) - else if (map[TY_NUM]!!.length == 11 && map[TY_NUM]!!.startsWith("18")) - map.setVal("num_class", TY_PHN) - else if (map[TY_NUM]!!.length == 11 && map[TY_NUM]!![0] == '0') - map.setVal("num_class", TY_PHN) - else - map.setVal("num_class", TY_NUM) - } - } else if (map.type == TY_DTE && i + 1 < str.length) { - val `in` = i + skip(str.substring(i)) - val sub = str.substring(`in`) - if (`in` < str.length) { - val pFSATimePrex = Util.checkTypes(root, "FSA_TIMEPRFX", sub) - val pFSATz = Util.checkTypes(root, "FSA_TZ", sub) - if (Util.isNumber(str[`in`]) || Util.checkTypes( - root, - "FSA_MONTHS", - sub - ) != null || Util.checkTypes(root, "FSA_DAYS", sub) != null - ) { - val kl = parseInternal(sub, config) - if (kl != null && kl.b.type == TY_DTE) { - map.putAll(kl.b) - i = `in` + kl.a - } - } else if (pFSATimePrex != null) { - val iTime = `in` + pFSATimePrex.a + 1 + skip(str.substring(`in` + pFSATimePrex.a + 1)) - if (iTime < str.length && (Util.isNumber(str[iTime]) || Util.checkTypes( - root, - "FSA_DAYS", - str.substring(iTime) - ) != null) - ) { - val p_ = parseInternal(str.substring(iTime), config) - if (p_ != null && p_.b.type == TY_DTE) { - map.putAll(p_.b) - i = iTime + p_.a - } - } - } else if (pFSATz != null) { - val j = skipForTZ(str.substring(`in` + pFSATz.a + 1), map) - i = `in` + pFSATz.a + 1 + j - } else if (sub.toLowerCase().startsWith("pm") || sub.toLowerCase().startsWith("am")) { - //todo handle appropriately for pm - i = `in` + 2 - } - } - } else if (map.type == TY_TMS) { - val v = map[map.type!!] - if (v != null && v.length == 8 && Util.isHour(v[0], v[1]) && Util.isHour(v[4], v[5])) { - extractTime(v.substring(0, 4), map.getValMap(), "dept") - extractTime(v.substring(4, 8), map.getValMap(), "arrv") - } - } - return Pair(i, map) - } - - private fun checkMonthType( - str: String, - i: Int - ) = Util.checkTypes(root, "FSA_MONTHS", str.substring(i)) - - private fun skipForTZ(str: String, map: FsaContextMap): Int { - var state = 1 - var i = 0 - var c: Char - while (state > 0 && i < str.length) { - c = str[i] - when (state) { - 1 -> if (c.toInt() == CH_SPACE || c.toInt() == CH_PLUS || Util.isNumber(c)) - state = 1 - else if (c.toInt() == CH_COLN) - state = 2 - else { - val s_ = str.substring(0, i).trim { it <= ' ' } - if (s_.length == 4 && Util.isNumber(s_)) {//we captured a year after IST Mon Sep 04 13:47:13 IST 2017 - map.put(DT_YYYY, s_) - state = -2 - } else - state = -1 - } - 2 -> - //todo re-adjust GMT time, current default +5:30 for IST - state = if (Util.isNumber(c)) - 3 - else - -1 - 3 -> state = if (Util.isNumber(c)) - 4 - else - -1 - 4 -> state = if (c.toInt() == CH_SPACE) - 5 - else - -2 - 5 -> { - val sy = str.substring(i, i + 4) - if (i + 3 < str.length && Util.isNumber(sy)) { - map.put(DT_YYYY, sy) - i += 3 - } - state = -2 - } - } - i++ - } - val s_ = str.substring(0, i).trim { it <= ' ' } - if (state == 1 && s_.length == 4 && Util.isNumber(s_)) - //we captured a year after IST Mon Sep 04 13:47:13 IST 2017 - map.put(DT_YYYY, s_) - return if (state == -1) 0 else i - } - - private fun skip(str: String): Int { - var i = 0 - while (i < str.length) { - if (str[i] == ' ' || str[i] == ',' || str[i] == '(' || str[i] == ':') - i++ - else - break - } - return i - } - - private fun nextSpace(str: String): Int { - var i = 0 - while (i < str.length) { - if (str[i] == ' ') - return i - else - i++ - } - return i - } - - private fun accAmtNumPct(str: String, i: Int, map: FsaContextMap, config: Map): Int { - //acc num amt pct - val c = str[i] - val subStr = str.substring(i) - - val pFSAAmt = Util.checkTypes(root, "FSA_AMT", subStr) - val pFSATimes = Util.checkTypes(root, "FSA_TIMES", subStr) - - if (c.toInt() == CH_FSTP) { //dot - if (i == 0 && config.containsKey(YUGA_SOURCE_CONTEXT) && config[YUGA_SOURCE_CONTEXT] == YUGA_SC_CURR) - map.setType(TY_AMT, TY_AMT) - map.append(c) - return 10 - } else if (c.toInt() == 42 || c.toInt() == 88 || c.toInt() == 120) {//*Xx - map.setType(TY_ACC, TY_ACC) - map.append('X') - return 11 - } else if (c.toInt() == CH_COMA) { //comma - return 12 - } else if (c.toInt() == CH_PCT || c.toInt() == CH_SPACE && i + 1 < str.length && str[i + 1].toInt() == CH_PCT) { //pct - map.setType(TY_PCT, TY_PCT) - return -1 - } else if (c.toInt() == CH_PLUS) { - if (config.containsKey(YUGA_SOURCE_CONTEXT) && config[YUGA_SOURCE_CONTEXT] == YUGA_SC_CURR) { - return -1 - } - map.setType(TY_STR, TY_STR) - return 36 - } else if (i > 0 && pFSAAmt != null) { - map.index = pFSAAmt.a - map.setType(TY_AMT, TY_AMT) - map.append(getAmt(pFSAAmt.b)) - return 38 - } else if (i > 0 && pFSATimes != null) { - val ind = i + pFSATimes.a - map.index = ind - map.setType(TY_TME, null) - var s = str.substring(0, i) - if (pFSATimes.b == "mins") - s = "00$s" - extractTime(s, map.getValMap()) - return 38 - } else - return -1 - } - - private fun getAmt(type: String): String { - when (type) { - "lakh", "lac" -> return "00000" - "k" -> return "000" - else -> return "" - } - } - - private fun extractTime(str: String, valMap: MutableMap, vararg prefix: String) { - var pre = "" - if (prefix.isNotEmpty()) - pre = prefix[0] + "_" - val pattern = "([0-9]{2})([0-9]{2})?([0-9]{2})?".toRegex() - val m = pattern.find(str) - - m?.let { - val gps = it.groups - valMap[pre + "time"] = - gps[1]?.value.toString() + if (it.groups.size > 1 && gps[2] != null) ":" + gps[2]?.value.toString() else ":00" - } - } - - private fun lookAheadForInstr(str: String, index: Int): Int { - var c: Char - for (i in index until str.length) { - c = str[i] - if (c.toInt() == CH_FSTP) { - } else return if (c.toInt() == 42 || c.toInt() == 88 || c.toInt() == 120 || Util.isNumber(c)) - i - else - -1 - } - return -1 - } - - internal class DelimiterStack { - private val stack: MutableList = mutableListOf() - - fun push(ch: Char) { - stack.add(ch) - } - - fun pop(): Char { - return if (stack.isNotEmpty()) { - stack[stack.size - 1] - } else '~' - } - } - -} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/model/State.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/model/State.kt new file mode 100644 index 0000000..7100e31 --- /dev/null +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/model/State.kt @@ -0,0 +1,18 @@ +package com.twelfthmile.kyuga.model + +import com.twelfthmile.kyuga.Kyuga +import com.twelfthmile.kyuga.types.RootTrie +import com.twelfthmile.kyuga.utils.FsaContextMap + +internal data class StateContext( + val root: RootTrie, + val str: String, + val nextChar: Char, + val contextMap: FsaContextMap, + val index: Int, + val delimiterStack: Kyuga.DelimiterStack, + val config: Map, + val counter: Int +) + +internal data class StateResult(val state: Int, val index: Int, val counter: Int) \ No newline at end of file diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State13to18.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State13to18.kt new file mode 100644 index 0000000..5c48dd5 --- /dev/null +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State13to18.kt @@ -0,0 +1,244 @@ +package com.twelfthmile.kyuga.states + +import com.twelfthmile.kyuga.model.StateContext +import com.twelfthmile.kyuga.model.StateResult +import com.twelfthmile.kyuga.utils.* + +internal fun state13( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.append(c) + state = 13 + } else if (c.toInt() == 42 || c.toInt() == 88 || c.toInt() == 120) { + //*Xx + contextMap.append('X') + state = 13 + } else if (c.toInt() == CH_FSTP && config.containsKey(YUGA_SOURCE_CONTEXT) && config[YUGA_SOURCE_CONTEXT] == YUGA_SC_CURR) { //LIC **150.00 fix + contextMap.setType(TY_AMT, TY_AMT) + contextMap.put(TY_AMT, contextMap[TY_AMT]!!.replace("X".toRegex(), "")) + contextMap.append(c) + state = 10 + } else if (c.toInt() == CH_FSTP && str.lookAheadForInstr(i).let { + if (it > 0) { + i = it + true + } else { + false + } + }) { + // emptiness + state = 13 + } else { + i -= 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state14( + context: StateContext +): StateResult { + var state = 14 + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.append(c) + } else if (c.toInt() == CH_PCT) { + contextMap.setType(TY_PCT, TY_PCT) + state = -1 + } else if ((c == 'k' || c == 'c') && i + 1 < str.length && str[i + 1] == 'm') { + contextMap.setType(TY_DST, TY_DST) + i += 1 + state = -1 + } else if ((c == 'k' || c == 'm') && i + 1 < str.length && str[i + 1] == 'g') { + contextMap.setType(TY_WGT, TY_WGT) + i += 1 + state = -1 + } else { + var tempBrk = true + if (c.toInt() == CH_FSTP && i + 1 < str.length && Util.isNumber(str[i + 1])) { + val samt = contextMap[contextMap.type!!] + if (samt!!.contains(".")) { + val samtarr = samt.split("\\.".toRegex()) + if (samtarr.size == 2) { + contextMap.type = TY_DTE + contextMap.put(DT_D, samtarr[0]) + contextMap.put(DT_MM, samtarr[1]) + state = 19 + tempBrk = false + } + } + } + if (tempBrk) { + i -= 1 + state = -1 + } + } + } + return StateResult(state, i, context.counter) +} + +internal fun state15( + context: StateContext +): StateResult { + var state = 15 + var i = context.index + var localCounter = context.counter + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + localCounter++ + contextMap.append(c) + } else if (c.toInt() == CH_COMA) + //comma + state = 12 + else if (c.toInt() == CH_FSTP) { //dot + contextMap.append(c) + state = 10 + } else if ((c.toInt() == 42 || c.toInt() == 88 || c.toInt() == 120) && i + 1 < str.length && (Util.isNumber( + str[i + 1] + ) || str[i + 1].toInt() == CH_HYPH || str[i + 1].toInt() == 42 || str[i + 1].toInt() == 88 || str[i + 1].toInt() == 120) + ) {//*Xx + contextMap.setType(TY_ACC, TY_ACC) + contextMap.append('X') + state = 11 + } else if (c.toInt() == CH_SPACE && i + 2 < str.length && Util.isNumber(str[i + 1]) && Util.isNumber( + str[i + 2] + ) + ) { + state = 41 + } else { + i -= 1 + state = -1 + }// else if (c == Constants.CH_ATRT) { + // delimiterStack.push(c); + // state = 43; + // } + } + return StateResult(state, i, context.counter) +} + +internal fun state16( + context: StateContext +): StateResult { + var state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.upgrade(c) + state = 17 + } else if (c.toInt() == CH_SPACE || c.toInt() == CH_COMA) + state = 16 + else if (Util.checkTypes(root, "FSA_MONTHS", str.substring(i))?.let { + contextMap.put(DT_MMM, it.second) + i += it.first + true + } == true) { + state = 24 + } else if (c.toInt() == CH_FSTP) { //dot + contextMap.setType(TY_NUM, TY_NUM) + contextMap.append(c) + state = 10 + } else if (i > 0 && Util.checkTypes(root, "FSA_TIMES", str.substring(i))?.let { + contextMap.setType(TY_TME, null) + var s = str.substring(0, i) + if (it.second == "mins" || it.second == "minutes") + s = "00$s" + s.extractTime(contextMap.getValMap()) + i += it.first + true + } == true) { + state = -1 + } else {//this is just a number, not a date + //to cater to 16 -Nov -17 + if (delimiterStack.pop() + .toInt() == CH_SPACE && c.toInt() == CH_HYPH && i + 1 < str.length && (Util.isNumber( + str[i + 1] + ) || Util.checkTypes(root, "FSA_MONTHS", str.substring(i + 1)) != null) + ) { + state = 16 + } else { + contextMap.setType(TY_NUM, TY_NUM) + var j = i + while (!Util.isNumber(str[j])) + j-- + i = j + state = -1 + } + } + } + return StateResult(state, i, context.counter) +} + +internal fun state17( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.append(c) + state = 18 + } else if (Util.isDateOperator(c)) { + delimiterStack.push(c) + state = 19 + } else if (c.toInt() == CH_COMA && delimiterStack.pop().toInt() == CH_COMA) { //comma + contextMap.setType(TY_NUM, TY_NUM) + state = 12 + } else if (c.toInt() == CH_FSTP && delimiterStack.pop().toInt() == CH_COMA) { //dot + contextMap.setType(TY_NUM, TY_NUM) + contextMap.append(c) + state = 10 + } else { + contextMap.setType(TY_STR, TY_STR) + i -= 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state18( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isDateOperator(c)) { + delimiterStack.push(c) + state = 19 + } else if (Util.isNumber(c) && delimiterStack.pop().toInt() == CH_COMA) { + contextMap.setType(TY_NUM, TY_NUM) + state = 12 + contextMap.append(c) + } else if (Util.isNumber(c) && delimiterStack.pop().toInt() == CH_HYPH) { + contextMap.setType(TY_NUM, TY_NUM) + state = 42 + contextMap.append(c) + } else if (c.toInt() == CH_COMA && delimiterStack.pop().toInt() == CH_COMA) { //comma + contextMap.setType(TY_NUM, TY_NUM) + state = 12 + } else if (c.toInt() == CH_FSTP && delimiterStack.pop().toInt() == CH_COMA) { //dot + contextMap.setType(TY_NUM, TY_NUM) + contextMap.append(c) + state = 10 + } else if (c.toInt() == CH_FSTP && contextMap.contains(DT_D) && contextMap.contains(DT_MM)) { //dot + state = -1 + } else { + contextMap.setType(TY_STR, TY_STR) + i -= 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State19to24.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State19to24.kt new file mode 100644 index 0000000..b55ba64 --- /dev/null +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State19to24.kt @@ -0,0 +1,124 @@ +package com.twelfthmile.kyuga.states + +import com.twelfthmile.kyuga.model.StateContext +import com.twelfthmile.kyuga.model.StateResult +import com.twelfthmile.kyuga.utils.* + +internal fun state19( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.upgrade(c) + state = 20 + } else { + i -= 2 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state20( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + when { + Util.isNumber(c) -> { + contextMap.append(c) + state = 21 + } + c == ':' -> { + if (contextMap.contains(DT_YY)) + contextMap.convert(DT_YY, DT_HH) + else if (contextMap.contains(DT_YYYY)) + contextMap.convert(DT_YYYY, DT_HH) + state = 4 + } + else -> { + contextMap.remove(DT_YY)//since there is no one number year + i -= 1 + state = -1 + } + } + } + return StateResult(state, i, context.counter) +} + +internal fun state21( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + when { + Util.isNumber(c) -> { + contextMap.upgrade(c) + state = 22 + } + c == ':' -> { + if (contextMap.contains(DT_YY)) + contextMap.convert(DT_YY, DT_HH) + else if (contextMap.contains(DT_YYYY)) + contextMap.convert(DT_YYYY, DT_HH) + state = 4 + } + else -> { + i -= 1 + state = -1 + } + } + } + return StateResult(state, i, context.counter) +} + +internal fun state22( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.append(c) + state = -1 + } else { + contextMap.remove(DT_YYYY)//since there is no three number year + i -= 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state24( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isDateOperator(c) || c.toInt() == CH_COMA) { + delimiterStack.push(c) + state = 24 + } else if (Util.isNumber(c)) { + contextMap.upgrade(c) + state = 20 + } else if (c.toInt() == CH_SQOT && i + 1 < str.length && Util.isNumber(str[i + 1])) { + state = 24 + } else if (c == '|') { + state = 24 + } else { + i -= 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State1to6.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State1to6.kt new file mode 100644 index 0000000..01ac5f2 --- /dev/null +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State1to6.kt @@ -0,0 +1,195 @@ +package com.twelfthmile.kyuga.states + +import com.twelfthmile.kyuga.Kyuga +import com.twelfthmile.kyuga.model.StateContext +import com.twelfthmile.kyuga.model.StateResult +import com.twelfthmile.kyuga.utils.* +import com.twelfthmile.kyuga.utils.accAmtNumPct + +internal fun state0(context: StateContext): StateResult { + return StateResult(0, context.index, context.counter) +} +internal fun state1( + context: StateContext +): StateResult? { + val state: Int + var i = context.index + val isMonth by lazy { Util.checkTypes(context.root, "FSA_MONTHS", + context.str.substring(i)) } + val isDay by lazy { Util.checkTypes(context.root, "FSA_DAYS", + context.str.substring(i)) } + with(context) { + when { + Util.isNumber(context.nextChar) -> { + contextMap.setType(TY_NUM, null) + contextMap.put(TY_NUM, nextChar) + state = 2 + } + isMonth != null -> { + contextMap.setType(TY_DTE, null) + isMonth?.let { + contextMap.put(DT_MMM, it.second) + i += it.first + } + state = 33 + } + isDay != null -> { + contextMap.setType(TY_DTE, null) + isDay?.let { + contextMap.put(DT_DD, it.second) + i += it.first + } + state = 30 + } + nextChar.toInt() == CH_HYPH -> state = 37 + nextChar.toInt() == CH_LSBT -> state = 1 + else -> { + state = str.accAmtNumPct(root, i, contextMap, Kyuga.generateDefaultConfig()) + if (contextMap.type == null) + return null + if (state == -1 && contextMap.type != TY_PCT) { + i -= 1 + } + } + } + } + return StateResult(state, i, context.counter) +} + +internal fun state2( + context: StateContext +): StateResult { + var i = context.index + val state: Int + with(context) { + if (Util.isNumber(nextChar)) { + contextMap.append(nextChar) + state = 3 + } else if (Util.isTimeOperator(nextChar)) { + delimiterStack.push(nextChar) + contextMap.setType(TY_DTE, DT_HH) + state = 4 + } else if (Util.isDateOperator(nextChar) || nextChar.toInt() == CH_COMA) { + delimiterStack.push(nextChar) + contextMap.setType(TY_DTE, DT_D) + state = 16 + } else if (Util.checkTypes(root, "FSA_MONTHS", str.substring(i))?.let { + contextMap.setType(TY_DTE, DT_D) + contextMap.put(DT_MMM, it.second) + i += it.first + true + } == true) { + state = 24 + } else { + state = str.accAmtNumPct(root, i, contextMap, config) + if (state == -1 && contextMap.type != TY_PCT) + i -= 1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state3( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + if (Util.isNumber(nextChar)) { + contextMap.append(nextChar) + state = 8 + } else if (Util.isTimeOperator(nextChar)) { + delimiterStack.push(nextChar) + contextMap.setType(TY_DTE, DT_HH) + state = 4 + } else if (Util.isDateOperator(nextChar) || nextChar.toInt() == CH_COMA) { + delimiterStack.push(nextChar) + contextMap.setType(TY_DTE, DT_D) + state = 16 + } else if (Util.checkTypes(root, "FSA_MONTHS", str.substring(i))?.let { + contextMap.setType(TY_DTE, DT_D) + contextMap.put(DT_MMM, it.second) + i += it.first + true + } == true) { + state = 24 + } else if (Util.checkTypes(root, "FSA_DAYSFFX", str.substring(i))?.let { + contextMap.setType(TY_DTE, DT_D) + i += it.first + true + } == true) { + state = 32 + } else { + state = str.accAmtNumPct(root, i, contextMap, config) + if (state == -1 && contextMap.type != TY_PCT) + i -= 1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state4( + context: StateContext +): StateResult { + var i = context.index + val state: Int + with(context) { + if (Util.isNumber(nextChar)) { + contextMap.upgrade(nextChar)//hh to mm + state = 5 + } else { //saw a colon randomly, switch back to num from hours + if (!contextMap.contains(DT_MMM)) + contextMap.setType(TY_NUM, TY_NUM) + i -= 2 //move back so that colon is omitted + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state5( + context: StateContext +): StateResult { + var i = context.index + val state: Int + with(context) { + if (Util.isNumber(nextChar)) { + contextMap.append(nextChar) + state = 5 + } else if (nextChar.toInt() == CH_COLN) + state = 6 + else if (nextChar == 'a' && i + 1 < str.length && str[i + 1] == 'm') { + i += 1 + state = -1 + } else if (nextChar == 'p' && i + 1 < str.length && str[i + 1] == 'm') { + contextMap.put(DT_HH, (contextMap[DT_HH]!!.toInt() + 12).toString()) + i += 1 + state = -1 + } else if (Util.checkTypes(root, "FSA_TIMES", str.substring(i))?.let { + i += it.first + true + } == true) { + state = -1 + } else + state = 7 + } + return StateResult(state, i, context.counter) +} + +internal fun state6( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + if (Util.isNumber(nextChar)) { + contextMap.upgrade(nextChar) + if (i + 1 < str.length && Util.isNumber(str[i + 1])) + contextMap.append(str[i + 1]) + i += 1 + state = -1 + } else + state = -1 + } + return StateResult(state, i, context.counter) +} diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State25to30.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State25to30.kt new file mode 100644 index 0000000..df094ef --- /dev/null +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State25to30.kt @@ -0,0 +1,152 @@ +package com.twelfthmile.kyuga.states + +import com.twelfthmile.kyuga.model.StateContext +import com.twelfthmile.kyuga.model.StateResult +import com.twelfthmile.kyuga.utils.* + +internal fun state25( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.setType(TY_DTE, DT_YYYY) + contextMap.put(DT_MM, c) + state = 26 + } else if (i > 0 && Util.checkTypes(root, "FSA_TIMES", str.substring(i))?.let { + contextMap.setType(TY_TME, null) + var s = str.substring(0, i) + if (it.second == "mins") + s = "00$s" + s.extractTime(contextMap.getValMap()) + i += it.first + true + } == true) { + state = -1 + } else { + //it wasn't year, it was just a number + i -= 2 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state26( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.append(c) + state = 27 + } else { + contextMap.setType(TY_STR, TY_STR) + i -= 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state27( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isDateOperator(c)) { + delimiterStack.push(c) + state = 28 + } else if (Util.isNumber(c)) {//it was a number, most probably telephone number + if (contextMap.type == TY_DTE) { + contextMap.setType(TY_NUM, TY_NUM) + } + contextMap.append(c) + if ((delimiterStack.pop().toInt() == CH_SLSH || delimiterStack.pop() + .toInt() == CH_HYPH) && i + 1 < str.length && Util.isNumber( + str[i + 1] + ) && (i + 2 == str.length || Util.isDelimiter(str[i + 2])) + ) {//flight time 0820/0950 + contextMap.setType(TY_TMS, TY_TMS) + contextMap.append(str[i + 1]) + i += 1 + state = -1 + } else if (delimiterStack.pop().toInt() == CH_SPACE) { + state = 41 + } else + state = 12 + } else if (c.toInt() == 42 || c.toInt() == 88 || c.toInt() == 120) {//*Xx + contextMap.setType(TY_ACC, TY_ACC) + contextMap.append('X') + state = 11 + } else { + contextMap.setType(TY_STR, TY_STR) + i -= 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state28( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.put(DT_D, c) + state = 29 + } else { + contextMap.setType(TY_STR, TY_STR) + i -= 2 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state29( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.append(c) + } else + i -= 1 + state = -1 + } + return StateResult(state, i, context.counter) +} + +internal fun state30( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (c.toInt() == CH_COMA || c.toInt() == CH_SPACE) + state = 30 + else if (Util.isNumber(c)) { + contextMap.put(DT_D, c) + state = 31 + } else { + contextMap.type = TY_DTE + i -= 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State31to36.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State31to36.kt new file mode 100644 index 0000000..7943ee7 --- /dev/null +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State31to36.kt @@ -0,0 +1,158 @@ +package com.twelfthmile.kyuga.states + +import com.twelfthmile.kyuga.model.StateContext +import com.twelfthmile.kyuga.model.StateResult +import com.twelfthmile.kyuga.utils.* + +internal fun state31( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.append(c) + state = 32 + } else if (Util.checkTypes(root, "FSA_MONTHS", str.substring(i))?.let { + contextMap.put(DT_MMM, it.second) + i += it.first + true + } == true) { + state = 24 + } else if (c.toInt() == CH_COMA || c.toInt() == CH_SPACE) + state = 32 + else { + i -= 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state32( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.checkTypes(root, "FSA_MONTHS", str.substring(i))?.let { + contextMap.put(DT_MMM, it.second) + i += it.first + true + } == true) { + state = 24 + } else if (c.toInt() == CH_COMA || c.toInt() == CH_SPACE) + state = 32 + else if (Util.checkTypes(root, "FSA_DAYSFFX", str.substring(i))?.let { + i += it.first + true + } == true) { + state = 32 + } else { + var j = i + while (!Util.isNumber(str[j])) + j-- + i = j + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state33( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.put(DT_D, c) + state = 34 + } else if (c.toInt() == CH_SPACE || c.toInt() == CH_COMA || c.toInt() == CH_HYPH) + state = 33 + else { + contextMap.type = TY_DTE + i -= 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state34( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.append(c) + state = 35 + } else if (c.toInt() == CH_SPACE || c.toInt() == CH_COMA) + state = 35 + else { + contextMap.type = TY_DTE + i -= 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state35( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + if (i > 1 && Util.isNumber(str[i - 1])) { + contextMap.convert(DT_D, DT_YYYY) + contextMap.append(c) + } else + contextMap.put(DT_YY, c) + state = 20 + } else if (c.toInt() == CH_SPACE || c.toInt() == CH_COMA) + state = 40 + else { + contextMap.type = TY_DTE + i -= 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state36( + context: StateContext +): StateResult? { + var state = 36 + val i = context.index + var localCounter = context.counter + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.append(c) + localCounter++ + } else if (c.toInt() == CH_FSTP && i + 1 < str.length && Util.isNumber(str[i + 1])) { + contextMap.append(c) + state = 10 + } else if (c.toInt() == CH_HYPH && i + 1 < str.length && Util.isNumber(str[i + 1])) { + delimiterStack.push(c) + contextMap.append(c) + state = 16 + } else { + if (counter == 12 || Util.isNumber(str.substring(1, i))) + contextMap.setType(TY_NUM, TY_NUM) + else + return null + state = -1 + } + } + return StateResult(state, i, localCounter) +} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State37to42.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State37to42.kt new file mode 100644 index 0000000..be05dfe --- /dev/null +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State37to42.kt @@ -0,0 +1,123 @@ +package com.twelfthmile.kyuga.states + +import com.twelfthmile.kyuga.model.StateContext +import com.twelfthmile.kyuga.model.StateResult +import com.twelfthmile.kyuga.utils.* + +internal fun state37( + context: StateContext +): StateResult { + val state: Int + val i = context.index + with(context) { + val c = nextChar + state = when { + Util.isNumber(c) -> { + contextMap.setType(TY_AMT, TY_AMT) + contextMap.put(TY_AMT, '-') + contextMap.append(c) + 12 + } + c.toInt() == CH_FSTP -> { + contextMap.put(TY_AMT, '-') + contextMap.append(c) + 10 + } + else -> -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state38( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + i = contextMap.index!! + state = -1 + } + return StateResult(state, i, context.counter) +} + +internal fun state39( + context: StateContext +): StateResult { + var state = 39 + val i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) + contextMap.append(c) + else { + contextMap.setType(TY_ACC, TY_ACC) + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state40( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.put(DT_YY, c) + state = 20 + } else if (c.toInt() == CH_SPACE || c.toInt() == CH_COMA) + state = 40 + else { + contextMap.type = TY_DTE + i -= 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state41( + context: StateContext +): StateResult { + var state = 41 + var i = context.index + with(context) { + val c = nextChar + when { + Util.isNumber(c) -> { + contextMap.append(c) + } + c.toInt() == CH_SPACE -> state = 41 + else -> { + i = if (i - 1 > 0 && str[i - 1].toInt() == CH_SPACE) + i - 2 + else + i - 1 + state = -1 + } + } + } + return StateResult(state, i, context.counter) +} + +internal fun state42( + context: StateContext +): StateResult { + var state = 42 + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.append(c) + } else if (c.toInt() == CH_HYPH && i + 1 < str.length && Util.isNumber(str[i + 1])) { + state = 39 + } else { + i -= 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State43to45.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State43to45.kt new file mode 100644 index 0000000..95f4888 --- /dev/null +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State43to45.kt @@ -0,0 +1,62 @@ +package com.twelfthmile.kyuga.states + +import com.twelfthmile.kyuga.model.StateContext +import com.twelfthmile.kyuga.model.StateResult +import com.twelfthmile.kyuga.utils.* + +internal fun state43( + context: StateContext +): StateResult { + val state: Int + val i = context.index + with(context) { + val c = nextChar + state = if (Util.isLowerAlpha(c) || Util.isNumber(c)) { + contextMap.setType(TY_VPD, TY_VPD) + contextMap.append(delimiterStack.pop()) + contextMap.append(c) + 44 + } else { + -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state44( + context: StateContext +): StateResult { + val state: Int + val i = context.index + with(context) { + val c = nextChar + state = if (Util.isLowerAlpha(c) || Util.isNumber(c) || c.toInt() == CH_FSTP) { + contextMap.append(c) + 44 + } else + -1 + } + return StateResult(state, i, context.counter) +} + +internal fun state45( + context: StateContext +): StateResult { + var state = 45 + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.append(c) + } else if (c.toInt() == CH_HYPH && i + 1 < str.length && Util.isNumber(str[i + 1])) { + state = 39 + } else { + i -= if (i - 1 > 0 && str[i - 1].toInt() == CH_COMA) + 2 + else + 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State7to12.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State7to12.kt new file mode 100644 index 0000000..bd2d8c5 --- /dev/null +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/states/State7to12.kt @@ -0,0 +1,167 @@ +package com.twelfthmile.kyuga.states + +import com.twelfthmile.kyuga.model.StateContext +import com.twelfthmile.kyuga.model.StateResult +import com.twelfthmile.kyuga.utils.* + +internal fun state7( + context: StateContext +): StateResult { + var i = context.index + val state: Int + with(context) { + if (nextChar == 'a' && i + 1 < str.length && str[i + 1] == 'm') { + i += 1 + val hh = contextMap[DT_HH]!!.toInt() + if (hh == 12) + contextMap.put(DT_HH, 0.toString()) + } else if (nextChar == 'p' && i + 1 < str.length && str[i + 1] == 'm') { + val hh = contextMap[DT_HH]!!.toInt() + if (hh != 12) + contextMap.put(DT_HH, (hh + 12).toString()) + i += 1 + } else if (Util.checkTypes(root, "FSA_TIMES", str.substring(i))?.let { + i += it.first + true + } == true) { + // emptiness + } else + i -= 2 + state = -1 + } + return StateResult(state, i, context.counter) +} + +internal fun state8( + context: StateContext +):StateResult { + var state: Int + var i = context.index + with(context) { + if (Util.isNumber(nextChar)) { + contextMap.append(nextChar) + state = 9 + } else { + state = str.accAmtNumPct(root, i, contextMap, config) + if (nextChar.toInt() == CH_SPACE && state == -1 && i + 1 < str.length && Util.isNumber(str[i + 1])) + state = 12 + else if (nextChar.toInt() == CH_HYPH && state == -1 && i + 1 < str.length && Util.isNumber(str[i + 1])) + state = 45 + else if (state == -1 && contextMap.type != TY_PCT) + i -= 1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state9( + context: StateContext +): StateResult { + var localCounter = context.counter + val state: Int + var i = context.index + with(context) { + if (Util.isDateOperator(nextChar)) { + delimiterStack.push(nextChar) + state = 25 + } else if (Util.isNumber(nextChar)) { + contextMap.append(nextChar) + localCounter = 5 + state = 15 + } else { + state = str.accAmtNumPct(root, i, contextMap, config) + if (state == -1 && contextMap.type != TY_PCT) {//NUM + i -= 1 + } + }//handle for num case + } + return StateResult(state, i, localCounter) +} + +internal fun state10( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + if (Util.isNumber(nextChar)) { + contextMap.append(nextChar) + contextMap.setType(TY_AMT, TY_AMT) + state = 14 + } else { //saw a fullstop randomly + contextMap.pop()//remove the dot which was appended + i -= 2 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state11( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + if (nextChar.toInt() == 42 || nextChar.toInt() == 88 || nextChar.toInt() == 120) { + //*Xx + contextMap.append('X') + state = 11 + } else if (nextChar.toInt() == CH_HYPH) + state = 11 + else if (Util.isNumber(nextChar)) { + contextMap.append(nextChar) + state = 13 + } else if (nextChar == ' ' && i + 1 < str.length && + (str[i + 1].toInt() == 42 || + str[i + 1].toInt() == 88 || + str[i + 1].toInt() == 120 || + Util.isNumber(str[i + 1]))) + state = 11 + else if (nextChar.toInt() == CH_FSTP && str.lookAheadForInstr(i).let { + if (it > 0) { + i = it + true + } else { + false + } + }) { + state = 11 + // emptiness + } else { + i -= 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} + +internal fun state12( + context: StateContext +): StateResult { + val state: Int + var i = context.index + with(context) { + val c = nextChar + if (Util.isNumber(c)) { + contextMap.setType(TY_AMT, TY_AMT) + contextMap.append(c) + state = 12 + } else if (c.toInt() == CH_COMA) + //comma + state = 12 + else if (c.toInt() == CH_FSTP) { //dot + contextMap.append(c) + state = 10 + } else if (c.toInt() == CH_HYPH && i + 1 < str.length && Util.isNumber(str[i + 1])) { + state = 39 + } else { + i -= if (i - 1 > 0 && str[i - 1].toInt() == CH_COMA) + 2 + else + 1 + state = -1 + } + } + return StateResult(state, i, context.counter) +} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/types/GenTrie.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/types/GenTrie.kt deleted file mode 100644 index 513aaa1..0000000 --- a/src/commonMain/kotlin/com/twelfthmile/kyuga/types/GenTrie.kt +++ /dev/null @@ -1,17 +0,0 @@ -package com.twelfthmile.kyuga.types - -class GenTrie { - - var leaf = false - var child = false - val next: MutableMap = mutableMapOf() - var token: String? = null - -} - -class RootTrie { - val next: MutableMap = mutableMapOf() -} - - -data class Pair(val a: A, val b: B) \ No newline at end of file diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/types/KyugaTrie.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/types/KyugaTrie.kt new file mode 100644 index 0000000..ebb1805 --- /dev/null +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/types/KyugaTrie.kt @@ -0,0 +1,44 @@ +package com.twelfthmile.kyuga.types + +import com.twelfthmile.kyuga.utils.FSA_TYPES + +class KyugaTrie { + + val root: RootTrie + + init { + val mutableRoot = MutableRootTrie() + FSA_TYPES.forEach { + mutableRoot.next[it.first] = MutableGenTrie() + } + FSA_TYPES.forEach { + mutableRoot.next[it.first]?.let { node -> seed(it.second, node) } + } + root = mutableRoot.toRootTrie() + } + + private fun seed(type: String, mutableTrie: MutableGenTrie) { + var t: MutableGenTrie + val tokens = type.split(",") + tokens.forEach { + var i = 0 + t = mutableTrie + while (i < it.length) { + val char = it[i] + t.child = true + if (t.next.containsKey(char).not()) + t.next[char] = MutableGenTrie() + t = t.next[char] ?: throw IllegalStateException() + if (i == it.length - 1) { + t.leaf = true + t.token = it.replace(";", "") + } else if (i < it.length - 1 && it[i + 1].toInt() == 59) { + t.leaf = true + t.token = it.replace(";", "") + i++ + } + i++ + } + } + } +} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/types/MutableGenTrie.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/types/MutableGenTrie.kt new file mode 100644 index 0000000..d4b1928 --- /dev/null +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/types/MutableGenTrie.kt @@ -0,0 +1,35 @@ +package com.twelfthmile.kyuga.types + +data class GenTrie( + val leaf: Boolean = false, + val child: Boolean = false, + val next: Map = mutableMapOf(), + val token: String? = null +) + +open class MutableGenTrie( + var leaf: Boolean = false, + var child: Boolean = false, + val next: MutableMap = mutableMapOf(), + var token: String? = null +) + +class RootTrie(val next: Map) + +class MutableRootTrie { + val next: MutableMap = mutableMapOf() +} + +internal fun MutableGenTrie.toGenTrie(): GenTrie { + return GenTrie(this.leaf, this.child, this.next.toUnMutableGenTrie(), this.token) +} + +internal fun MutableMap.toUnMutableGenTrie(): Map{ + return this.map { it.key to it.value.toGenTrie() }.toMap() +} + +internal fun MutableMap.toUnMutableRootTrie(): Map{ + return this.map { it.key to it.value.toGenTrie() }.toMap() +} + +internal fun MutableRootTrie.toRootTrie() = RootTrie(this.next.toUnMutableRootTrie()) \ No newline at end of file diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/utils/FsaContextMap.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/utils/FsaContextMap.kt index f9aa40a..315d888 100644 --- a/src/commonMain/kotlin/com/twelfthmile/kyuga/utils/FsaContextMap.kt +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/utils/FsaContextMap.kt @@ -1,11 +1,9 @@ package com.twelfthmile.kyuga.utils import com.twelfthmile.kyuga.expectations.* -import com.twelfthmile.kyuga.utils.* class FsaContextMap { - //todo change to private private var map: MutableMap = mutableMapOf() private val valMap = mutableMapOf() private var prevKey: String? = null @@ -39,13 +37,6 @@ class FsaContextMap { prevKey = key } - fun put(key: String, value: Int) { - if (!keys.contains(key)) - keys.add(key) - map[key] = value.toString() - prevKey = key - } - fun put(key: String?, value: String) { if (!keys.contains(key)) keys.add(key) @@ -59,10 +50,6 @@ class FsaContextMap { convert(convertType) } - fun getVal(name: String): String? { - return valMap[name] - } - fun setVal(name: String, `val`: String) { valMap[name] = `val` } @@ -149,7 +136,8 @@ class FsaContextMap { } fun putAll(fsaContextMap: FsaContextMap) { -// map.putAll(fsaContextMap.map) + // Put all had a bug in kotlin multi-platform + // map.putAll(fsaContextMap.map) fsaContextMap.map.forEach { map[it.key] = it.value } @@ -239,10 +227,7 @@ class FsaContextMap { if (invalidDateContributors.size > 0) { return if (invalidDateContributors.size == 1 && invalidDateContributors[0] == DT_MM && ifDay && ifYear) { val format = DT_D + "/" + DT_MM + "/" + if (map.containsKey(DT_YY)) DT_YY else DT_YYYY - val value = map[DT_MM] + "/" + map[DT_D] + "/" + if (map.containsKey( - DT_YY - ) - ) map[DT_YY] else map[DT_YYYY] + val value = map[DT_MM] + "/" + map[DT_D] + "/" + if (map.containsKey(DT_YY)) map[DT_YY] else map[DT_YYYY] formatDateByFormat(value, format) } else null diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/utils/KYugaConstants.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/utils/KYugaConstants.kt index 4ba7735..fab34fa 100644 --- a/src/commonMain/kotlin/com/twelfthmile/kyuga/utils/KYugaConstants.kt +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/utils/KYugaConstants.kt @@ -31,14 +31,16 @@ const val TY_OTP = "OTP" const val TY_VPD = "VPD" //VPA-ID //public static final String TY_DCT = "DCT"; //date context like sunday,today,tomorrow -const val FSA_MONTHS = "jan;uary,feb;ruary,mar;ch,apr;il,may,jun;e,jul;y,aug;ust,sep;t;ember,oct;ober,nov;ember,dec;ember" -const val FSA_DAYS = "sun;day,mon;day,tue;sday,wed;nesday,thu;rsday,thur;sday,fri;day,sat;urday" -const val FSA_TIMEPRFX = "at,on,before,by" -const val FSA_AMT = "lac,lakh,k" -const val FSA_TIMES = "hours,hrs,hr,mins,minutes" -const val FSA_TZ = "gmt,ist" -const val FSA_DAYSFFX = "st,nd,rd,th" -const val FSA_UPI = "UPI,MMT,NEFT" +val FSA_TYPES = listOf( + Pair("FSA_MONTHS", "jan;uary,feb;ruary,mar;ch,apr;il,may,jun;e,jul;y,aug;ust,sep;t;ember,oct;ober,nov;ember,dec;ember"), + Pair("FSA_DAYS", "sun;day,mon;day,tue;sday,wed;nesday,thu;rsday,thur;sday,fri;day,sat;urday"), + Pair("FSA_TIMEPRFX", "at,on,before,by"), + Pair("FSA_AMT", "lac,lakh,k"), + Pair("FSA_TIMES", "hours,hrs,hr,mins,minutes"), + Pair("FSA_TZ", "gmt,ist"), + Pair("FSA_DAYSFFX", "st,nd,rd,th"), + Pair("FSA_UPI", "UPI,MMT,NEFT") +) const val CH_SPACE = 32 const val CH_PCT = 37 diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/utils/StringExtensions.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/utils/StringExtensions.kt new file mode 100644 index 0000000..3e8cac3 --- /dev/null +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/utils/StringExtensions.kt @@ -0,0 +1,90 @@ +package com.twelfthmile.kyuga.utils + +import com.twelfthmile.kyuga.types.RootTrie + +private val EXTRACT_TIME_REGEX = "([0-9]{2})([0-9]{2})?([0-9]{2})?".toRegex() + +internal fun String.accAmtNumPct( + rootTrie: RootTrie, + i: Int, + map: FsaContextMap, + config: Map +): Int { + //acc num amt pct + val c = this[i] + val subStr = this.substring(i) + + val pFSAAmt = Util.checkTypes(rootTrie, "FSA_AMT", subStr) + val pFSATimes = Util.checkTypes(rootTrie, "FSA_TIMES", subStr) + + if (c.toInt() == CH_FSTP) { //dot + if (i == 0 && config.containsKey(YUGA_SOURCE_CONTEXT) && config[YUGA_SOURCE_CONTEXT] == YUGA_SC_CURR) + map.setType(TY_AMT, TY_AMT) + map.append(c) + return 10 + } else if (c.toInt() == 42 || c.toInt() == 88 || c.toInt() == 120) {//*Xx + map.setType(TY_ACC, TY_ACC) + map.append('X') + return 11 + } else if (c.toInt() == CH_COMA) { //comma + return 12 + } else if (c.toInt() == CH_PCT || c.toInt() == CH_SPACE && i + 1 < this.length && this[i + 1].toInt() == CH_PCT) { //pct + map.setType(TY_PCT, TY_PCT) + return -1 + } else if (c.toInt() == CH_PLUS) { + if (config.containsKey(YUGA_SOURCE_CONTEXT) && config[YUGA_SOURCE_CONTEXT] == YUGA_SC_CURR) { + return -1 + } + map.setType(TY_STR, TY_STR) + return 36 + } else if (i > 0 && pFSAAmt != null) { + map.index = pFSAAmt.first + map.setType(TY_AMT, TY_AMT) + map.append(pFSAAmt.second.getAmt()) + return 38 + } else if (i > 0 && pFSATimes != null) { + val ind = i + pFSATimes.first + map.index = ind + map.setType(TY_TME, null) + var s = this.substring(0, i) + if (pFSATimes.second == "mins") + s = "00$s" + s.extractTime(map.getValMap()) + return 38 + } else + return -1 +} + +internal fun String.getAmt(): String { + return when (this) { + "lakh", "lac" -> "00000" + "k" -> "000" + else -> "" + } +} + +internal fun String.extractTime(valMap: MutableMap, vararg prefix: String) { + var pre = "" + if (prefix.isNotEmpty()) + pre = prefix[0] + "_" + val m = EXTRACT_TIME_REGEX.find(this) + + m?.let { + val gps = it.groups + valMap[pre + "time"] = + gps[1]?.value.toString() + if (it.groups.size > 1 && gps[2] != null) ":" + gps[2]?.value.toString() else ":00" + } +} + +internal fun String.lookAheadForInstr(index: Int): Int { + var c: Char + for (i in index until this.length) { + c = this[i] + if (c.toInt() != CH_FSTP) { + return if (c.toInt() == 42 || c.toInt() == 88 || c.toInt() == 120 || Util.isNumber(c)) + i + else -1 + } + } + return -1 +} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/twelfthmile/kyuga/utils/Util.kt b/src/commonMain/kotlin/com/twelfthmile/kyuga/utils/Util.kt index 0800639..021e36a 100644 --- a/src/commonMain/kotlin/com/twelfthmile/kyuga/utils/Util.kt +++ b/src/commonMain/kotlin/com/twelfthmile/kyuga/utils/Util.kt @@ -1,7 +1,5 @@ package com.twelfthmile.kyuga.utils -import com.twelfthmile.kyuga.types.GenTrie -import com.twelfthmile.kyuga.types.Pair import com.twelfthmile.kyuga.types.RootTrie object Util { @@ -37,18 +35,18 @@ object Util { fun checkTypes(root: RootTrie, type: String, word: String): Pair? { var i = 0 - var t: GenTrie? = root.next[type] ?: return null + var t = root.next[type] ?: return null while (i < word.length) { val ch = word[i] - if (t != null && t.leaf && !t.next.containsKey(ch) && isTypeEnd(ch)) - return t.token?.let { tkn -> Pair(i - 1, tkn) } - if (t != null && t.child && t.next.containsKey(ch)) { - t = t.next[ch] + if (t.leaf && !t.next.containsKey(ch) && isTypeEnd(ch)) + return t.token?.let { tkn -> Pair(i - 1, tkn) } + if (t.child && t.next.containsKey(ch)) { + t = t.next[ch] ?: throw IllegalStateException("If check done, cannot be null") } else break i++ } - return if (t != null && t.leaf && i == word.length) t.token?.let { tkn -> Pair(i - 1, tkn) } else null + return if (t.leaf && i == word.length) t.token?.let { tkn -> Pair(i - 1, tkn) } else null } private fun isTypeEnd(ch: Char): Boolean { diff --git a/src/commonTest/kotlin/YugaTest.kt b/src/commonTest/kotlin/YugaTest.kt index ecf5c0d..52ceaf5 100644 --- a/src/commonTest/kotlin/YugaTest.kt +++ b/src/commonTest/kotlin/YugaTest.kt @@ -17,7 +17,7 @@ class YugaTest { fun `on tokenize - valid sms - should tokenize`() { val validSms = "INR 7,980.00 Dr to A/c No XX2471 towards SI HDFC177126215 BSES Rajdhani -02/10/17 Val 03-OCT-17. Clr Bal INR 8,822.69." val candidateTokens = validSms.split(" ").map { it.trim() } - val tokens = Kyuga.tokenise(candidateTokens) + val tokens = Kyuga.tokenize(candidateTokens) assertEquals("[INR, AMT, Dr, to, A/c, No, INSTRNO, towards, SI, IDVAL, BSES, Rajdhani, AMT, Val, DATE, Clr, Bal, INR, AMT]", tokens.toString()) } @@ -26,7 +26,7 @@ class YugaTest { fun `on tokenize - valid offer- should tokenize`() { val sms = "55865 is your One Time Password. This OTP is valid for 10 minutes only. OTP generated on: 09-04-2017 22:55:4" val candidateTokens = sms.split(" ").map { it.trim() } - val tokens = Kyuga.tokenise(candidateTokens) + val tokens = Kyuga.tokenize(candidateTokens) println(tokens) }