Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Pull Request #3

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Starter pipeline
# Start with a minimal pipeline that you can customize to build and deploy your code.
# Add steps that build, run tests, deploy, and more:
# https://aka.ms/yaml

trigger:
- master

pool:
vmImage: ubuntu-latest

steps:
- script: echo Hello, world!
displayName: 'Run a one-line script'

- script: |
echo Add other tasks to build, test, and deploy your project.
echo See https://aka.ms/yaml
displayName: 'Run a multi-line script'
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,10 @@ trait HasInternalJsonOutputParser {
}
}

trait HasInternalStringOutputParser {
protected def getInternalOutputParser(schema: StructType): HTTPOutputParser = new StringOutputParser()
}

trait HasUrlPath {
def urlPath: String
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ object GenerateThumbnails extends ComplexParamsReadable[GenerateThumbnails] with
class GenerateThumbnails(override val uid: String)
extends CognitiveServicesBase(uid) with HasImageInput
with HasWidth with HasHeight with HasSmartCropping
with HasInternalJsonOutputParser with HasCognitiveServiceInput with HasSetLocation with BasicLogging
with HasCognitiveServiceInput with HasSetLocation with BasicLogging
with HasSetLinkedService {
logClass()

Expand All @@ -454,8 +454,6 @@ class GenerateThumbnails(override val uid: String)
new CustomOutputParser().setUDF({ r: HTTPResponseData => r.entity.map(_.content).orNull })
}

override def responseDataType: DataType = BinaryType

def urlPath: String = "/vision/v2.0/generateThumbnail"
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,13 @@

package com.microsoft.azure.synapse.ml.cognitive

import com.microsoft.azure.synapse.ml.core.contracts.HasOutputCol
import com.microsoft.azure.synapse.ml.core.schema.SparkBindings
import com.microsoft.azure.synapse.ml.io.http.HasErrorCol
import com.microsoft.azure.synapse.ml.param.ServiceParam
import com.microsoft.cognitiveservices.speech.SpeechSynthesisCancellationDetails
import org.apache.spark.ml.param.{Param, Params}
import spray.json.DefaultJsonProtocol.StringJsonFormat
import spray.json.{DefaultJsonProtocol, RootJsonFormat}

case class DetailedSpeechResponse(Confidence: Double,
Expand Down Expand Up @@ -57,7 +62,6 @@ object SpeechFormat extends DefaultJsonProtocol {
jsonFormat9(TranscriptionResponse.apply)
implicit val TranscriptionParticipantFormat: RootJsonFormat[TranscriptionParticipant] =
jsonFormat3(TranscriptionParticipant.apply)

}

object SpeechSynthesisError extends SparkBindings[SpeechSynthesisError] {
Expand All @@ -67,3 +71,67 @@ object SpeechSynthesisError extends SparkBindings[SpeechSynthesisError] {
}

case class SpeechSynthesisError(errorCode: String, errorDetails: String, errorReason: String)

trait HasLocaleCol extends HasServiceParams {
val locale = new ServiceParam[String](this,
"locale",
s"The locale of the input text",
isRequired = true)

def setLocale(v: String): this.type = setScalarParam(locale, v)
def setLocaleCol(v: String): this.type = setVectorParam(locale, v)
}

trait HasTextCol extends HasServiceParams {
val text = new ServiceParam[String](this,
"text",
s"The text to synthesize",
isRequired = true)

def setText(v: String): this.type = setScalarParam(text, v)
def setTextCol(v: String): this.type = setVectorParam(text, v)
}

trait HasVoiceCol extends HasServiceParams {
val voice = new ServiceParam[String](this,
"voice",
s"The name of the voice used for synthesis",
isRequired = true)

def setVoiceName(v: String): this.type = setScalarParam(voice, v)
def setVoiceNameCol(v: String): this.type = setVectorParam(voice, v)
}

trait HasSSMLOutputCol extends Params {
val ssmlOutputCol = new Param[String](this, "ssmlCol", "The name of the SSML column")

def setSSMLOutputCol(value: String): this.type = set(ssmlOutputCol, value)

def getSSMLOutputCol: String = $(ssmlOutputCol)
}

trait HasSSMLGeneratorParams extends HasServiceParams
with HasLocaleCol with HasTextCol with HasVoiceCol
with HasSSMLOutputCol with HasOutputCol with HasErrorCol

case class TextToSpeechSSMLError(errorCode: String, errorDetails: String)
object TextToSpeechSSMLError extends SparkBindings[TextToSpeechSSMLError]

case class SSMLConversation(Begin: Int,
End: Int,
Content: String,
Role: String,
Style: String)
object SSMLConversation extends SparkBindings[SSMLConversation]

case class TextToSpeechSSMLResponse(IsValid: Boolean, Conversations: Seq[SSMLConversation])
object TextToSpeechSSMLResponse extends SparkBindings[TextToSpeechSSMLResponse]

object TextToSpeechSSMLFormat extends DefaultJsonProtocol {
implicit val ConversationFormat: RootJsonFormat[SSMLConversation] =
jsonFormat(SSMLConversation.apply, "Begin", "End", "Content", "Role", "Style")
implicit val TextToSpeechSSMLResponseFormat: RootJsonFormat[TextToSpeechSSMLResponse] =
jsonFormat(TextToSpeechSSMLResponse.apply, "IsValid", "Conversations")
implicit val TextToSpeechSSMLErrorFormat: RootJsonFormat[TextToSpeechSSMLError] =
jsonFormat(TextToSpeechSSMLError.apply, "ErrorCode", "ErrorDetails")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.azure.synapse.ml.cognitive

import com.microsoft.azure.synapse.ml.cognitive.TextToSpeechSSMLFormat.TextToSpeechSSMLResponseFormat
import com.microsoft.azure.synapse.ml.logging.BasicLogging
import org.apache.http.client.methods.HttpRequestBase
import org.apache.http.entity.{AbstractHttpEntity, StringEntity}
import org.apache.spark.ml.{ComplexParamsReadable, NamespaceInjections, PipelineModel, Transformer}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.util.Identifiable
import org.apache.spark.sql.catalyst.encoders.RowEncoder
import org.apache.spark.sql.types.{StringType, StructType}
import org.apache.spark.sql.{DataFrame, Dataset, Row}
import spray.json._

object TextToSpeechSSMLGenerator extends ComplexParamsReadable[TextToSpeechSSMLGenerator] with Serializable
class TextToSpeechSSMLGenerator(override val uid: String) extends CognitiveServicesBase(uid)
with HasSSMLGeneratorParams with HasCognitiveServiceInput
with HasInternalStringOutputParser
with BasicLogging {
logClass()

def this() = this(Identifiable.randomUID(classOf[TextToSpeechSSMLGenerator].getSimpleName))

setDefault(errorCol -> (uid + "_errors"))
setDefault(locale -> Left("en-US"))
setDefault(voice -> Left("en-US-SaraNeural"))

def urlPath: String = "cognitiveservices/v1"

protected val additionalHeaders: Map[String, String] = Map[String, String](
("X-Microsoft-OutputFormat", "textanalytics-json"),
("Content-Type", "application/ssml+xml")
)

override def inputFunc(schema: StructType): Row => Option[HttpRequestBase] = super.inputFunc(schema).andThen(r => {
r.map(req => {
additionalHeaders.foreach(header => req.setHeader(header._1, header._2))
req
})
})

override protected def prepareEntity: Row => Option[AbstractHttpEntity] = { row =>
val localeValue = getValue(row, locale)
val zhCNVoiceName = "Microsoft Server Speech Text to Speech Voice (zh-CN, XiaomoNeural)"
val enUSVoiceName = "Microsoft Server Speech Text to Speech Voice (en-US, JennyNeural)"
val voiceName = if (localeValue == "zh-CN") zhCNVoiceName else enUSVoiceName
val textValue = getValue(row, text)
val body: String =
s"<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis'" +
s" xmlns:mstts='https://www.w3.org/2001/mstts'" +
s" xml:lang='${localeValue}'><voice xml:lang='${localeValue}' xml:gender='Female' name='${voiceName}'>" +
s"<mstts:task name ='RoleStyle'/>${textValue}</voice></speak>"
Some(new StringEntity(body))
}

def formatSSML(row: Row, response: TextToSpeechSSMLResponse): String = {
val ssmlFormat: String = "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' " +
"xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='%s'>".format(getValue(row, locale)) +
"%s</speak>"
val voiceFormat = "<voice name='%s'>".format(getValue(row, voice)) + "%s</voice>"
val expressAsFormat = "<mstts:express-as role='%s' style='%s'>%s</mstts:express-as>"
val builder = new StringBuilder()
val fullText = "%s".format(getValue(row, text))
var lastEnd = 0
response.Conversations.foreach(c => {
val content = c.Content
val role = c.Role.toLowerCase()
val style = c.Style.toLowerCase()
val begin = c.Begin
val end = c.End

val ssmlTurnRoleStyleStr = expressAsFormat.format(role, style, content)
val preStr = fullText.substring(lastEnd, begin - lastEnd)

if (preStr.length > 0) {
builder.append(preStr)
}
builder.append(ssmlTurnRoleStyleStr)
lastEnd = end
})

val endStr = fullText.substring(lastEnd)
if (endStr.length > 0) {
builder.append(endStr)
}
val outSsmlStr = ssmlFormat.format(voiceFormat.format(builder.toString())) + "\n"
outSsmlStr
}

val postprocessingTransformer: Transformer = new Transformer {
def transform(dataset: Dataset[_]): DataFrame = dataset.toDF().map { row =>
val response = row.getAs[String](getOutputCol).parseJson.convertTo[TextToSpeechSSMLResponse]
val result = formatSSML(row, response)
Row.fromSeq(row.toSeq ++ Seq(result))
}(RowEncoder(transformSchema(dataset.schema)))

override val uid: String = Identifiable.randomUID("TTSSSMLInternalPostProcessor")

override def copy(extra: ParamMap): Transformer = defaultCopy(extra)

override def transformSchema(schema: StructType): StructType = schema.add(getSSMLOutputCol, StringType)
}

override def getInternalTransformer(schema: StructType): PipelineModel = {
NamespaceInjections.pipelineModel(stages=Array[Transformer](
super.getInternalTransformer(schema),
postprocessingTransformer
))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

package com.microsoft.azure.synapse.ml.cognitive.split1

import com.microsoft.azure.synapse.ml.Secrets
import com.microsoft.azure.synapse.ml.cognitive._
import com.microsoft.azure.synapse.ml.core.spark.FluentAPI._
import com.microsoft.azure.synapse.ml.core.test.base.{Flaky, TestBase}
Expand All @@ -14,10 +13,6 @@ import org.apache.spark.sql.functions.{col, typedLit}
import org.apache.spark.sql.{DataFrame, Dataset, Row}
import org.scalactic.Equality

trait CognitiveKey {
lazy val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", Secrets.CognitiveApiKey)
}

trait OCRUtils extends TestBase {

import spark.implicits._
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package com.microsoft.azure.synapse.ml.cognitive.split2

import com.microsoft.azure.synapse.ml.cognitive._
import com.microsoft.azure.synapse.ml.cognitive.TextToSpeechSSMLGenerator
import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing}
import org.apache.spark.ml.util.MLReadable
import org.apache.spark.sql.DataFrame
import spray.json._
import com.microsoft.azure.synapse.ml.cognitive.TextToSpeechSSMLFormat.TextToSpeechSSMLResponseFormat

class TextToSpeechSSMLGeneratorSuite extends TransformerFuzzing[TextToSpeechSSMLGenerator] with CognitiveKey {

import spark.implicits._

def ssmlGenerator: TextToSpeechSSMLGenerator = new TextToSpeechSSMLGenerator()
.setUrl("https://eastus.tts.speech.microsoft.com/cognitiveservices/v1")
.setSubscriptionKey(cognitiveKey)
.setTextCol("textColName")
.setOutputCol("outputColName")
.setSSMLOutputCol("SSMLColName")
.setErrorCol("errorColName")
.setLocale("en-US")
.setVoiceName("JennyNeural")

val testData: Map[String, (Boolean, String)] = Map[String, (Boolean, String)](
"\"I'm shouting excitedly!\" she shouted excitedly." ->
(true, "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' " +
"xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'><voice name='JennyNeural'>" +
"<mstts:express-as role='female' style='cheerful'>\"I'm shouting excitedly!\"</mstts:express-as> she shouted " +
"excitedly.</voice></speak>\n"),
"This text has no quotes in it, so isValid should be false" ->
(false, "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' " +
"xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'><voice name='JennyNeural'>" +
"This text has no quotes in it, so isValid should be false</voice></speak>\n"),
"\"This is an example of a sentence with unmatched quotes,\" she said.\"" ->
(false, "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' " +
"xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'><voice name='JennyNeural'>" +
"<mstts:express-as role='female' style='calm'>\"This is an example of a sentence with unmatched quotes,\"" +
"</mstts:express-as> she said.\"</voice></speak>\n")
)

lazy val df: DataFrame = testData.map(e => e._1).toSeq.toDF("textColName")

test("basic") {
testData.map(e => {
val transform = ssmlGenerator.transform(Seq(e._1).toDF("textColName"))
transform.show(truncate = false)
val result = transform.collect()
result.map(row => row.getString(2)).foreach(out =>
assert(out.parseJson.convertTo[TextToSpeechSSMLResponse].IsValid == e._2._1))
result.map(row => row.getString(3)).foreach(out =>
assert(out.trim == e._2._2.trim))
})
}

override def testObjects(): Seq[TestObject[TextToSpeechSSMLGenerator]] =
Seq(new TestObject(ssmlGenerator, df))

override def reader: MLReadable[_] = TextToSpeechSSMLGenerator
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

package com.microsoft.azure.synapse.ml.cognitive.split2

import com.microsoft.azure.synapse.ml.cognitive._
import com.microsoft.azure.synapse.ml.cognitive.TextToSpeech
import com.microsoft.azure.synapse.ml.cognitive.split1.CognitiveKey
import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing}
import org.apache.commons.io.FileUtils
import org.apache.spark.ml.util.MLReadable
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package com.microsoft.azure.synapse.ml.cognitive

import com.microsoft.azure.synapse.ml.Secrets

trait CognitiveKey {
lazy val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", Secrets.CognitiveApiKey)
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ package com.microsoft.azure.synapse.ml.param
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.param.{Param, ParamPair, Params}
import spray.json.{DefaultJsonProtocol, JsValue, JsonFormat, _}
import org.json4s.DefaultFormats

import scala.collection.JavaConverters._

object AnyJsonFormat extends DefaultJsonProtocol {

implicit def anyFormat: JsonFormat[Any] =
implicit def anyFormat: JsonFormat[Any] = {
def throwFailure(any: Any) = throw new IllegalArgumentException(s"Cannot serialize ${any} of type ${any.getClass}")

new JsonFormat[Any] {
def write(any: Any): JsValue = any match {
case v: Int => v.toJson
Expand All @@ -21,8 +22,15 @@ object AnyJsonFormat extends DefaultJsonProtocol {
case v: Boolean => v.toJson
case v: Integer => v.toLong.toJson
case v: Seq[_] => seqFormat[Any].write(v)
case v: Map[String, _] => mapFormat[String, Any].write(v)
case _ => throw new IllegalArgumentException(s"Cannot serialize ${any} of type ${any.getClass}")
case v: Map[_, _] => {
try {
mapFormat[String, Any].write(v.asInstanceOf[Map[String, _]])
}
catch {
case _: SerializationException => throwFailure(any)
}
}
case _ => throwFailure(any)
}

def read(value: JsValue): Any = value match {
Expand Down
1 change: 1 addition & 0 deletions test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
New file content3