BrendanWalsh · BrendanWalsh · Oct 24, 2022 · Dec 16, 2022 · Mar 18, 2023 · Mar 27, 2023
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -0,0 +1,19 @@
+# Starter pipeline
+# Start with a minimal pipeline that you can customize to build and deploy your code.
+# Add steps that build, run tests, deploy, and more:
+# https://aka.ms/yaml
+
+trigger:
+- master
+
+pool:
+  vmImage: ubuntu-latest
+
+steps:
+- script: echo Hello, world!
+  displayName: 'Run a one-line script'
+
+- script: |
+    echo Add other tasks to build, test, and deploy your project.
+    echo See https://aka.ms/yaml
+  displayName: 'Run a multi-line script'
diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/CognitiveServiceBase.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/CognitiveServiceBase.scala
@@ -225,6 +225,10 @@ trait HasInternalJsonOutputParser {
   }
 }
 
+trait HasInternalStringOutputParser {
+  protected def getInternalOutputParser(schema: StructType): HTTPOutputParser = new StringOutputParser()
+}
+
 trait HasUrlPath {
   def urlPath: String
 }

diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/ComputerVision.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/ComputerVision.scala
@@ -444,7 +444,7 @@ object GenerateThumbnails extends ComplexParamsReadable[GenerateThumbnails] with
 class GenerateThumbnails(override val uid: String)
   extends CognitiveServicesBase(uid) with HasImageInput
     with HasWidth with HasHeight with HasSmartCropping
-    with HasInternalJsonOutputParser with HasCognitiveServiceInput with HasSetLocation with BasicLogging
+    with HasCognitiveServiceInput with HasSetLocation with BasicLogging
     with HasSetLinkedService {
   logClass()
 
@@ -454,8 +454,6 @@ class GenerateThumbnails(override val uid: String)
     new CustomOutputParser().setUDF({ r: HTTPResponseData => r.entity.map(_.content).orNull })
   }
 
-  override def responseDataType: DataType = BinaryType
-
   def urlPath: String = "/vision/v2.0/generateThumbnail"
 }
 

diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/SpeechSchemas.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/SpeechSchemas.scala
@@ -3,8 +3,13 @@
 
 package com.microsoft.azure.synapse.ml.cognitive
 
+import com.microsoft.azure.synapse.ml.core.contracts.HasOutputCol
 import com.microsoft.azure.synapse.ml.core.schema.SparkBindings
+import com.microsoft.azure.synapse.ml.io.http.HasErrorCol
+import com.microsoft.azure.synapse.ml.param.ServiceParam
 import com.microsoft.cognitiveservices.speech.SpeechSynthesisCancellationDetails
+import org.apache.spark.ml.param.{Param, Params}
+import spray.json.DefaultJsonProtocol.StringJsonFormat
 import spray.json.{DefaultJsonProtocol, RootJsonFormat}
 
 case class DetailedSpeechResponse(Confidence: Double,
@@ -57,7 +62,6 @@ object SpeechFormat extends DefaultJsonProtocol {
     jsonFormat9(TranscriptionResponse.apply)
   implicit val TranscriptionParticipantFormat: RootJsonFormat[TranscriptionParticipant] =
     jsonFormat3(TranscriptionParticipant.apply)
-
 }
 
 object SpeechSynthesisError extends SparkBindings[SpeechSynthesisError] {
@@ -67,3 +71,67 @@ object SpeechSynthesisError extends SparkBindings[SpeechSynthesisError] {
 }
 
 case class SpeechSynthesisError(errorCode: String, errorDetails: String, errorReason: String)
+
+trait HasLocaleCol extends HasServiceParams {
+  val locale = new ServiceParam[String](this,
+    "locale",
+    s"The locale of the input text",
+    isRequired = true)
+
+  def setLocale(v: String): this.type = setScalarParam(locale, v)
+  def setLocaleCol(v: String): this.type = setVectorParam(locale, v)
+}
+
+trait HasTextCol extends HasServiceParams {
+  val text = new ServiceParam[String](this,
+    "text",
+    s"The text to synthesize",
+    isRequired = true)
+
+  def setText(v: String): this.type = setScalarParam(text, v)
+  def setTextCol(v: String): this.type = setVectorParam(text, v)
+}
+
+trait HasVoiceCol extends HasServiceParams {
+  val voice = new ServiceParam[String](this,
+    "voice",
+    s"The name of the voice used for synthesis",
+    isRequired = true)
+
+  def setVoiceName(v: String): this.type = setScalarParam(voice, v)
+  def setVoiceNameCol(v: String): this.type = setVectorParam(voice, v)
+}
+
+trait HasSSMLOutputCol extends Params {
+  val ssmlOutputCol = new Param[String](this, "ssmlCol", "The name of the SSML column")
+
+  def setSSMLOutputCol(value: String): this.type = set(ssmlOutputCol, value)
+
+  def getSSMLOutputCol: String = $(ssmlOutputCol)
+}
+
+trait HasSSMLGeneratorParams extends HasServiceParams
+  with HasLocaleCol with HasTextCol with HasVoiceCol
+  with HasSSMLOutputCol with HasOutputCol with HasErrorCol
+
+case class TextToSpeechSSMLError(errorCode: String, errorDetails: String)
+  object TextToSpeechSSMLError extends SparkBindings[TextToSpeechSSMLError]
+
+case class SSMLConversation(Begin: Int,
+                        End: Int,
+                        Content: String,
+                        Role: String,
+                        Style: String)
+object SSMLConversation extends SparkBindings[SSMLConversation]
+
+case class TextToSpeechSSMLResponse(IsValid: Boolean, Conversations: Seq[SSMLConversation])
+object TextToSpeechSSMLResponse extends SparkBindings[TextToSpeechSSMLResponse]
+
+object TextToSpeechSSMLFormat extends DefaultJsonProtocol {
+  implicit val ConversationFormat: RootJsonFormat[SSMLConversation] =
+    jsonFormat(SSMLConversation.apply, "Begin", "End", "Content", "Role", "Style")
+  implicit val TextToSpeechSSMLResponseFormat: RootJsonFormat[TextToSpeechSSMLResponse] =
+    jsonFormat(TextToSpeechSSMLResponse.apply, "IsValid", "Conversations")
+  implicit val TextToSpeechSSMLErrorFormat: RootJsonFormat[TextToSpeechSSMLError] =
+    jsonFormat(TextToSpeechSSMLError.apply, "ErrorCode", "ErrorDetails")
+}
diff --git a/...e/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TextToSpeechSSMLGenerator.scala b/...e/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TextToSpeechSSMLGenerator.scala
@@ -0,0 +1,113 @@
+// Copyright (C) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License. See LICENSE in project root for information.
+
+package com.microsoft.azure.synapse.ml.cognitive
+
+import com.microsoft.azure.synapse.ml.cognitive.TextToSpeechSSMLFormat.TextToSpeechSSMLResponseFormat
+import com.microsoft.azure.synapse.ml.logging.BasicLogging
+import org.apache.http.client.methods.HttpRequestBase
+import org.apache.http.entity.{AbstractHttpEntity, StringEntity}
+import org.apache.spark.ml.{ComplexParamsReadable, NamespaceInjections, PipelineModel, Transformer}
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.ml.util.Identifiable
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.types.{StringType, StructType}
+import org.apache.spark.sql.{DataFrame, Dataset, Row}
+import spray.json._
+
+object TextToSpeechSSMLGenerator extends ComplexParamsReadable[TextToSpeechSSMLGenerator] with Serializable
+class TextToSpeechSSMLGenerator(override val uid: String) extends CognitiveServicesBase(uid)
+  with HasSSMLGeneratorParams with HasCognitiveServiceInput
+  with HasInternalStringOutputParser
+  with BasicLogging {
+  logClass()
+
+  def this() = this(Identifiable.randomUID(classOf[TextToSpeechSSMLGenerator].getSimpleName))
+
+  setDefault(errorCol -> (uid + "_errors"))
+  setDefault(locale -> Left("en-US"))
+  setDefault(voice -> Left("en-US-SaraNeural"))
+
+  def urlPath: String = "cognitiveservices/v1"
+
+  protected val additionalHeaders: Map[String, String] = Map[String, String](
+    ("X-Microsoft-OutputFormat", "textanalytics-json"),
+    ("Content-Type", "application/ssml+xml")
+  )
+
+  override def inputFunc(schema: StructType): Row => Option[HttpRequestBase] = super.inputFunc(schema).andThen(r => {
+    r.map(req => {
+      additionalHeaders.foreach(header => req.setHeader(header._1, header._2))
+      req
+    })
+  })
+
+  override protected def prepareEntity: Row => Option[AbstractHttpEntity] = { row =>
+    val localeValue = getValue(row, locale)
+    val zhCNVoiceName = "Microsoft Server Speech Text to Speech Voice (zh-CN, XiaomoNeural)"
+    val enUSVoiceName = "Microsoft Server Speech Text to Speech Voice (en-US, JennyNeural)"
+    val voiceName = if (localeValue == "zh-CN") zhCNVoiceName else enUSVoiceName
+    val textValue = getValue(row, text)
+    val body: String =
+      s"<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis'" +
+        s" xmlns:mstts='https://www.w3.org/2001/mstts'" +
+        s" xml:lang='${localeValue}'><voice xml:lang='${localeValue}' xml:gender='Female' name='${voiceName}'>" +
+        s"<mstts:task name ='RoleStyle'/>${textValue}</voice></speak>"
+    Some(new StringEntity(body))
+  }
+
+  def formatSSML(row: Row, response: TextToSpeechSSMLResponse): String = {
+    val ssmlFormat: String = "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' " +
+      "xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='%s'>".format(getValue(row, locale)) +
+      "%s</speak>"
+    val voiceFormat = "<voice name='%s'>".format(getValue(row, voice)) + "%s</voice>"
+    val expressAsFormat = "<mstts:express-as role='%s' style='%s'>%s</mstts:express-as>"
+    val builder = new StringBuilder()
+    val fullText = "%s".format(getValue(row, text))
+    var lastEnd = 0
+    response.Conversations.foreach(c => {
+      val content = c.Content
+      val role = c.Role.toLowerCase()
+      val style = c.Style.toLowerCase()
+      val begin = c.Begin
+      val end = c.End
+
+      val ssmlTurnRoleStyleStr = expressAsFormat.format(role, style, content)
+      val preStr = fullText.substring(lastEnd, begin - lastEnd)
+
+      if (preStr.length > 0) {
+        builder.append(preStr)
+      }
+      builder.append(ssmlTurnRoleStyleStr)
+      lastEnd = end
+    })
+
+    val endStr = fullText.substring(lastEnd)
+    if (endStr.length > 0) {
+      builder.append(endStr)
+    }
+    val outSsmlStr = ssmlFormat.format(voiceFormat.format(builder.toString())) + "\n"
+    outSsmlStr
+  }
+
+  val postprocessingTransformer: Transformer = new Transformer {
+    def transform(dataset: Dataset[_]): DataFrame = dataset.toDF().map { row =>
+      val response = row.getAs[String](getOutputCol).parseJson.convertTo[TextToSpeechSSMLResponse]
+      val result = formatSSML(row, response)
+      Row.fromSeq(row.toSeq ++ Seq(result))
+    }(RowEncoder(transformSchema(dataset.schema)))
+
+    override val uid: String = Identifiable.randomUID("TTSSSMLInternalPostProcessor")
+
+    override def copy(extra: ParamMap): Transformer = defaultCopy(extra)
+
+    override def transformSchema(schema: StructType): StructType = schema.add(getSSMLOutputCol, StringType)
+  }
+
+  override def getInternalTransformer(schema: StructType): PipelineModel = {
+    NamespaceInjections.pipelineModel(stages=Array[Transformer](
+      super.getInternalTransformer(schema),
+      postprocessingTransformer
+    ))
+  }
+}
diff --git a/.../src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/ComputerVisionSuite.scala b/.../src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/ComputerVisionSuite.scala
@@ -3,7 +3,6 @@
 
 package com.microsoft.azure.synapse.ml.cognitive.split1
 
-import com.microsoft.azure.synapse.ml.Secrets
 import com.microsoft.azure.synapse.ml.cognitive._
 import com.microsoft.azure.synapse.ml.core.spark.FluentAPI._
 import com.microsoft.azure.synapse.ml.core.test.base.{Flaky, TestBase}
@@ -14,10 +13,6 @@ import org.apache.spark.sql.functions.{col, typedLit}
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.scalactic.Equality
 
-trait CognitiveKey {
-  lazy val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", Secrets.CognitiveApiKey)
-}
-
 trait OCRUtils extends TestBase {
 
   import spark.implicits._

diff --git a/...cala/com/microsoft/azure/synapse/ml/cognitive/split2/TextToSpeechSSMLGeneratorSuite.scala b/...cala/com/microsoft/azure/synapse/ml/cognitive/split2/TextToSpeechSSMLGeneratorSuite.scala
@@ -0,0 +1,60 @@
+package com.microsoft.azure.synapse.ml.cognitive.split2
+
+import com.microsoft.azure.synapse.ml.cognitive._
+import com.microsoft.azure.synapse.ml.cognitive.TextToSpeechSSMLGenerator
+import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing}
+import org.apache.spark.ml.util.MLReadable
+import org.apache.spark.sql.DataFrame
+import spray.json._
+import com.microsoft.azure.synapse.ml.cognitive.TextToSpeechSSMLFormat.TextToSpeechSSMLResponseFormat
+
+class TextToSpeechSSMLGeneratorSuite extends TransformerFuzzing[TextToSpeechSSMLGenerator] with CognitiveKey  {
+
+  import spark.implicits._
+
+  def ssmlGenerator: TextToSpeechSSMLGenerator = new TextToSpeechSSMLGenerator()
+    .setUrl("https://eastus.tts.speech.microsoft.com/cognitiveservices/v1")
+    .setSubscriptionKey(cognitiveKey)
+    .setTextCol("textColName")
+    .setOutputCol("outputColName")
+    .setSSMLOutputCol("SSMLColName")
+    .setErrorCol("errorColName")
+    .setLocale("en-US")
+    .setVoiceName("JennyNeural")
+
+  val testData: Map[String, (Boolean, String)] = Map[String, (Boolean, String)](
+    "\"I'm shouting excitedly!\" she shouted excitedly." ->
+      (true, "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' " +
+        "xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'><voice name='JennyNeural'>" +
+        "<mstts:express-as role='female' style='cheerful'>\"I'm shouting excitedly!\"</mstts:express-as> she shouted " +
+        "excitedly.</voice></speak>\n"),
+    "This text has no quotes in it, so isValid should be false" ->
+      (false, "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' " +
+        "xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'><voice name='JennyNeural'>" +
+        "This text has no quotes in it, so isValid should be false</voice></speak>\n"),
+    "\"This is an example of a sentence with unmatched quotes,\" she said.\"" ->
+      (false, "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' " +
+        "xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'><voice name='JennyNeural'>" +
+        "<mstts:express-as role='female' style='calm'>\"This is an example of a sentence with unmatched quotes,\"" +
+        "</mstts:express-as> she said.\"</voice></speak>\n")
+  )
+
+  lazy val df: DataFrame = testData.map(e => e._1).toSeq.toDF("textColName")
+
+  test("basic") {
+    testData.map(e => {
+      val transform = ssmlGenerator.transform(Seq(e._1).toDF("textColName"))
+      transform.show(truncate = false)
+      val result = transform.collect()
+      result.map(row => row.getString(2)).foreach(out =>
+        assert(out.parseJson.convertTo[TextToSpeechSSMLResponse].IsValid == e._2._1))
+      result.map(row => row.getString(3)).foreach(out =>
+        assert(out.trim == e._2._2.trim))
+    })
+  }
+
+  override def testObjects(): Seq[TestObject[TextToSpeechSSMLGenerator]] =
+    Seq(new TestObject(ssmlGenerator, df))
+
+  override def reader: MLReadable[_] = TextToSpeechSSMLGenerator
+}
diff --git a/...ve/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split2/TextToSpeechSuite.scala b/...ve/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split2/TextToSpeechSuite.scala
@@ -3,8 +3,8 @@
 
 package com.microsoft.azure.synapse.ml.cognitive.split2
 
+import com.microsoft.azure.synapse.ml.cognitive._
 import com.microsoft.azure.synapse.ml.cognitive.TextToSpeech
-import com.microsoft.azure.synapse.ml.cognitive.split1.CognitiveKey
 import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing}
 import org.apache.commons.io.FileUtils
 import org.apache.spark.ml.util.MLReadable

diff --git a/...rc/test/scala/com/microsoft/azure/synapse/ml/cognitive/test/CognitiveServicesCommon.scala b/...rc/test/scala/com/microsoft/azure/synapse/ml/cognitive/test/CognitiveServicesCommon.scala
@@ -0,0 +1,7 @@
+package com.microsoft.azure.synapse.ml.cognitive
+
+import com.microsoft.azure.synapse.ml.Secrets
+
+trait CognitiveKey {
+  lazy val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", Secrets.CognitiveApiKey)
+}
diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/UntypedArrayParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/UntypedArrayParam.scala
@@ -6,13 +6,14 @@ package com.microsoft.azure.synapse.ml.param
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.ml.param.{Param, ParamPair, Params}
 import spray.json.{DefaultJsonProtocol, JsValue, JsonFormat, _}
-import org.json4s.DefaultFormats
 
 import scala.collection.JavaConverters._
 
 object AnyJsonFormat extends DefaultJsonProtocol {
 
-  implicit def anyFormat: JsonFormat[Any] =
+  implicit def anyFormat: JsonFormat[Any] = {
+    def throwFailure(any: Any) = throw new IllegalArgumentException(s"Cannot serialize ${any} of type ${any.getClass}")
+
     new JsonFormat[Any] {
       def write(any: Any): JsValue = any match {
         case v: Int => v.toJson
@@ -21,8 +22,15 @@ object AnyJsonFormat extends DefaultJsonProtocol {
         case v: Boolean => v.toJson
         case v: Integer => v.toLong.toJson
         case v: Seq[_] => seqFormat[Any].write(v)
-        case v: Map[String, _] => mapFormat[String, Any].write(v)
-        case _ => throw new IllegalArgumentException(s"Cannot serialize ${any} of type ${any.getClass}")
+        case v: Map[_, _] => {
+          try {
+            mapFormat[String, Any].write(v.asInstanceOf[Map[String, _]])
+          }
+          catch {
+            case _: SerializationException => throwFailure(any)
+          }
+        }
+        case _ => throwFailure(any)
       }
 
       def read(value: JsValue): Any = value match {

diff --git a/test.txt b/test.txt
@@ -0,0 +1 @@
+New file content3