Skip to content

Commit

Permalink
Version 4 0 0 validation (#242)
Browse files Browse the repository at this point in the history
* Version set to 4.0.0

* cleaned up a redundant try block

* Added Chat and ASR messages, removed Metadata abstract type

* Removed unneeded base class

* Moved asr msg.subtype to ‘asr:transcription’

* Added factory companion objects for Trial, ASR and Chat messages

* Added BusMessage case class, fixed comments

* Added factory companion objects for DialogAgent and VersionInfo messages

* Added some printf output

* Moved the BusMessage class to the Common messages file

* reading JSON no longer throws an exception if it can't parse the input

* IDC Worker shows state after finishing a job

* Moved to factory model for message construction

* Fixed comments

* Fixed comments, added variable types in declarations

* Removed Metadata class in favor of Chat, ASR, and Trial

* Removed Metadata class in favor of Chat, ASR, and Trial

* Removed null default values

* Comments added

* Added keyboard interrupt

* Fixed comment

* Added file for MQTT bus message

* fixed comments

* Added base working copy, fixed comments

* Cleaned up comments

* TDAC reset no longer takes an argument

* Removed unneeded state in job queue processing

* edited comment

* Added a method to build DialogAgent messages from serialization

* Added Tdac retur message

* Reverted the TdacClient continue to use JSON

* Fixed TDAC comments, replaced server case class

* Dialog Agents wqno longer have to extend the base class

* TDAC Client will now shut down application if server cannot be reset

* Fixed an inheritance issue

* Added .metadata and .vtt file handlers

* Metadata file working

* fixed a comment

* removed an experimental agent class

* The IdcWorker can now be used with any DialogAgent type

* Deleted a comment

* Renamed a file

* Trying an experiment

* IdcWorker jobs now run sequentially

* Removed two experimental classes

* TDAC and IDCWorker args now enabled on file agent
  • Loading branch information
jastier authored Jan 10, 2022
1 parent e279d6c commit 0fcb617
Show file tree
Hide file tree
Showing 26 changed files with 972 additions and 1,041 deletions.
3 changes: 3 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ organization := "org.clulab"

scalaVersion := "2.12.4"

// uncomment to allow graceful keyboard interrupt
// fork in run := true

javacOptions ++= Seq("-source", "1.8", "-target", "1.8", "-Xlint")

initialize := {
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ Asr {
message_type = "observation"
}
msg {
sub_type = "asr"
sub_type = "asr:transcription"
}
}

Expand Down
202 changes: 19 additions & 183 deletions src/main/scala/org/clulab/asist/agents/DialogAgent.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package org.clulab.asist.agents
import ai.lum.common.ConfigFactory
import com.typesafe.config.Config
import com.typesafe.scalalogging.LazyLogging
import java.time.Clock
import org.clulab.asist.extraction.TomcatRuleEngine
import org.clulab.asist.messages._
import org.clulab.odin.Mention
Expand All @@ -26,40 +25,19 @@ class DialogAgent (

val config: Config = ConfigFactory.load()

// A reference with the unchanging config fields populated
val templateMessage: DialogAgentMessage = DialogAgentMessage(config)

// Message Bus topics
// Message Bus communication
val topicSubChat = config.getString("Chat.topic")
val topicSubAsr = config.getString("Asr.topic")
val topicSubTrial = config.getString("Trial.topic")

val topicPubDialogAgent = config.getString("DialogAgent.topic")
val topicPubVersionInfo = config.getString("VersionInfo.topic")
val topicPubHeartbeat = config.getString("Heartbeat.topic")

val subscriptions = List(
topicSubChat,
topicSubAsr,
topicSubTrial
)

val publications = List(
topicPubDialogAgent,
topicPubVersionInfo,
topicPubHeartbeat
)

// Create the engine and run it to get lazy init out of the way
// Run the rule engine to get its lazy init out of the way
logger.info("Initializing Extractor (this may take a few seconds) ...")
engine.extractFrom("green victim")
logger.info("Extractor initialized.")

/** Translate a structure to single-line JSON text
* @param a The structure to be translated
*/
// def writeJson[A <: AnyRef](a: A)(implicit formats: Formats): String = write(a)

/**
* Extract Odin mentions from text.
* @param text String text to extract from, can be multiple sentences.
Expand All @@ -71,79 +49,39 @@ class DialogAgent (
.sortBy(m => (m.sentence, m.label))
}

/**
* Extract Odin mentions from text.
* @param doc Document to extract from, can be multiple sentences.
* @return sequence of Odin Mentions
*/
def extractMentions(doc: Document): Seq[Mention] = {
engine
.extractFrom(doc)
.sortBy(m => (m.sentence, m.label))
}

/** Create the data component of the DialogAgentMessage structure
* @param participant_id human subject who created the text
* @param asr_msg_id from the Automated Speech Recognition system
* @param source_type Source of message data, either message_bus or a file
* @param source_name topic or filename
* @param text The text to be analyzed by the pipeline
*/
def dialogAgentMessageData(
participant_id: String,
asr_msg_id: String,
source_type: String,
source_name: String,
text: String
): DialogAgentMessageData =
DialogAgentMessageData(
participant_id,
asr_msg_id,
text,
dialog_act_label = null, // ...
DialogAgentMessageUtteranceSource(
source_type,
source_name
),
getExtractions(text)
)

/** Create the data component of the DialogAgentMessage structure
* @param participant_id human subject who created the text
* @param asr_msg_id from the Automated Speech Recognition system
* @param source_type Source of message data, either message_bus or a file
* @param source_name topic or filename
* @param text The text to be analyzed by the pipeline
* @param extractions extractions obtained from the text
*/
def dialogAgentMessageData(
participant_id: String,
asr_msg_id: String,
source_type: String,
source_name: String,
text: String,
extractions: Seq[DialogAgentMessageUtteranceExtraction]
): DialogAgentMessageData =
DialogAgentMessageData(
participant_id,
asr_msg_id,
text,
dialog_act_label = null, // ...
DialogAgentMessageUtteranceSource(
source_type,
source_name
),
extractions
)

/** Return an array of all extractions found in the input text
* @param text Text to analyze
*/
def getExtractions(text: String): Seq[DialogAgentMessageUtteranceExtraction] =
def getExtractions(
text: String
): Seq[DialogAgentMessageUtteranceExtraction] =
extractMentions(text).map(getExtraction)

def getExtractions(mentions: Seq[Mention]): Seq[DialogAgentMessageUtteranceExtraction] =
/** Map all of the mentions into extractions
* @param mentions To be mapped into extractions
*/
def getExtractions(
mentions: Seq[Mention]
): Seq[DialogAgentMessageUtteranceExtraction] =
mentions.map(getExtraction)

/** Create a DialogAgent extraction from Extractor data
* @param mention Contains text to analyze
*/
def getExtraction(mention: Mention): DialogAgentMessageUtteranceExtraction = {
def getExtraction(
mention: Mention
): DialogAgentMessageUtteranceExtraction = {
val originalArgs = mention.arguments.toArray
val extractionArguments = for {
(role, ms) <- originalArgs
Expand All @@ -159,106 +97,4 @@ class DialogAgent (
mention.foundBy,
)
}

/** create a DialogAgentMessage with metadata
* @param source_type Source of message data, either message_bus or a file
* @param source_name topic or filename
* @param topic Originating process for message
* @param metadata Experiment data
*/
def getDialogAgentMessage(
source_type: String,
source_name: String,
topic: String,
metadata: Metadata
): DialogAgentMessage = {
val timestamp = Clock.systemUTC.instant.toString
val participant_id = topic match {
case `topicSubChat` => (metadata.data.sender)
case `topicSubAsr` => (metadata.data.participant_id)
case _ => null
}
DialogAgentMessage(
templateMessage.header.copy(
timestamp = timestamp,
version = metadata.header.version
),
templateMessage.msg.copy(
experiment_id = metadata.msg.experiment_id,
trial_id = metadata.msg.trial_id,
timestamp = timestamp,
replay_root_id = metadata.msg.replay_root_id,
replay_id = metadata.msg.replay_id
),
dialogAgentMessageData(
participant_id,
metadata.data.id,
source_type,
source_name,
metadata.data.text
)
)
}

/** create a DialogAgentMessage without metadata
* @param source_type Source of message data, either keyboard or a file
* @param source_name topic or filename
* @param participant_id The individual who has spoken
* @param text Spoken text to be analyzed
*/
def getDialogAgentMessage(
source_type: String,
source_name: String,
participant_id: String,
text: String
): DialogAgentMessage = {
val timestamp = Clock.systemUTC.instant.toString
DialogAgentMessage(
templateMessage.header.copy(
timestamp = timestamp,
),
templateMessage.msg.copy(
timestamp = timestamp,
),
dialogAgentMessageData(
participant_id,
null, // ...
source_type,
source_name,
text
)
)
}

/* Read a DialogAgentMessageData struct from JSON and replace the extractions
* @param json A JSON representation of a DialogAgentMessageData struct
* @return The JSON-defined struct or a defaut if the parsing fails.
*/
def readDialogAgentMessageData(json: String): DialogAgentMessageData = try {
val data = JsonUtils.readJson[DialogAgentMessageData](json)
data.copy(extractions = getExtractions(data.text))
} catch {
case NonFatal(t) => {
logger.error(s"readDialogAgentMessageData could not parse ${json}")
logger.error(t.toString)
new DialogAgentMessageData(
utterance_source = new DialogAgentMessageUtteranceSource
)
}
}

/** Parse a string into a JValue
* @param line Hopefully JSON but could be anything the user tries to run
* @return A JSON value parsed from the line or None
*/
/*
def parseJValue(line: String): Option[JValue] = try {
Some(parse(s""" ${line} """))
} catch {
case NonFatal(t) =>
logger.error(s"parseJValue: Could not parse: ${line}\n")
logger.error(t.toString)
None
}
*/
}
Loading

0 comments on commit 0fcb617

Please sign in to comment.