Skip to content

Commit

Permalink
An alternative approach to #31 (see also #33)
Browse files Browse the repository at this point in the history
This approach moves the node handling to an object algebra. This allows
for relatively easily adding configurable stuff that's able to change
how various YAML types are handled as the need arises. One can even plug
in their own algebra if they want to get nitty-gritty.

It also allows adding optional error accumulation, which is something
that's always felt missing.

A nice thing here is by default we don't have to even check any
configuration - only when a parser instance is configured do we change
to `ConfiguredAlg`.

Also should be binary compatible (and `yaml.parser.configure(...)` is a
nice way to get a configured parser!).

TODO: Add tests around error accumulation and such (coverage currently
lacking)
  • Loading branch information
jeremyrsmith committed Jun 10, 2017
1 parent befb2b6 commit c9763fc
Show file tree
Hide file tree
Showing 3 changed files with 252 additions and 92 deletions.
160 changes: 160 additions & 0 deletions src/main/scala/io/circe/yaml/parser/NodeAlg.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
package io.circe.yaml.parser

import cats.data.ValidatedNel
import cats.instances.either._
import cats.instances.list._
import cats.syntax.either._
import cats.syntax.traverse._
import io.circe.{Json, JsonNumber, JsonObject, ParsingFailure}
import org.yaml.snakeyaml.constructor.SafeConstructor
import org.yaml.snakeyaml.nodes._
import scala.collection.JavaConverters._
import scala.collection.immutable.Queue

abstract class NodeAlg[T] {
def int(node: ScalarNode): T
def float(node: ScalarNode): T
def timestamp(node: ScalarNode): T
def bool(node: ScalarNode): T
def yNull(node: ScalarNode): T
def string(node: ScalarNode): T
def otherScalar(node: ScalarNode): T

def sequence(node: SequenceNode): T = fromValues {
node.getValue.asScala.foldLeft(Queue.empty[T]) {
(accum, next) => accum enqueue any(next)
}
}

def mapping(node: MappingNode): T = fromFields {
node.getValue.asScala.map {
nodeTuple => nodeTuple.getKeyNode match {
case keyNode: ScalarNode => keyNode.getValue -> any(nodeTuple.getValueNode)
case _ => throw ParsingFailure("Only string keys can be represented in JSON", null)
}
}
}

def fromValues(ts: Iterable[T]): T
def fromFields(ts: Iterable[(String, T)]): T

final def any(node: Node): T = node match {
case node: ScalarNode => node.getTag match {
case Tag.INT => int(node)
case Tag.FLOAT => float(node)
case Tag.TIMESTAMP => timestamp(node)
case Tag.BOOL => bool(node)
case Tag.NULL => yNull(node)
case Tag.STR => string(node)
case _ => otherScalar(node)
}
case node: SequenceNode => sequence(node)
case node: MappingNode => mapping(node)
}
}

final class LiftedAlg[A](lifted: NodeAlg[A]) extends NodeAlg[Either[ParsingFailure, A]] {
private def wrap(what: String)(err: Throwable) = ParsingFailure(s"Failed to parse $what", err)
def int(node: ScalarNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.int(node)).leftMap(wrap("integer value"))

def float(node: ScalarNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.float(node)).leftMap(wrap("float value"))

def timestamp(node: ScalarNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.timestamp(node)).leftMap(wrap("timestamp value"))

def bool(node: ScalarNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.bool(node)).leftMap(wrap("boolean value"))

def yNull(node: ScalarNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.yNull(node)).leftMap(wrap("null value"))

def string(node: ScalarNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.string(node)).leftMap(wrap("string value"))

def otherScalar(node: ScalarNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.otherScalar(node)).leftMap(wrap("scalar value"))

override def sequence(node: SequenceNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.sequence(node)).leftMap(wrap("sequence"))

override def mapping(node: MappingNode): Either[ParsingFailure, A] =
Either.catchNonFatal(lifted.mapping(node)).leftMap(wrap("mapping"))

def fromValues(ts: Iterable[Either[ParsingFailure, A]]): Either[ParsingFailure, A] = try {
Either.right {
lifted.fromValues {
ts.map(_.valueOr(throw _))
}
}
} catch {
case f @ ParsingFailure(_, _) => Either.left(f)
}

def fromFields(ts: Iterable[(String, Either[ParsingFailure, A])]): Either[ParsingFailure, A] = try {
Either.right {
lifted.fromFields {
ts.map {
case (key, value) => key -> value.valueOr(throw _)
}
}
}
} catch {
case f @ ParsingFailure(_, _) => Either.left(f)
}
}

final class AccumlatingAlg[A](base: NodeAlg[A]) extends NodeAlg[ValidatedNel[ParsingFailure, A]] {
private val lifted = new LiftedAlg(base)
def int(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.int(node).toValidatedNel
def float(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.float(node).toValidatedNel
def timestamp(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.timestamp(node).toValidatedNel
def bool(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.bool(node).toValidatedNel
def yNull(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.yNull(node).toValidatedNel
def string(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.string(node).toValidatedNel
def otherScalar(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.otherScalar(node).toValidatedNel

def fromFields(ts: Iterable[(String, ValidatedNel[ParsingFailure, A])]): ValidatedNel[ParsingFailure, A] =
ts.toList.traverseU {
case (key, value) => value.map(key -> _)
}.map(base.fromFields)

def fromValues(ts: Iterable[ValidatedNel[ParsingFailure, A]]): ValidatedNel[ParsingFailure, A] =
ts.toList.sequenceU.map(base.fromValues)
}

class DefaultAlg extends NodeAlg[Json] {
protected object Constructor extends SafeConstructor {
def flatten(node: MappingNode): Unit = flattenMapping(node)
}

final protected def number(str: String): Json = JsonNumber.fromString(str).map(Json.fromJsonNumber).getOrElse {
throw new NumberFormatException(s"Invalid numeric string $str")
}

def int(node: ScalarNode): Json = number(node.getValue)
def float(node: ScalarNode): Json = number(node.getValue)
def timestamp(node: ScalarNode): Json = Json.fromString(node.getValue)
def bool(node: ScalarNode): Json = Json.fromBoolean(node.getValue.toBoolean)
def yNull(node: ScalarNode): Json = Json.Null
def string(node: ScalarNode): Json = Json.fromString(node.getValue)
def otherScalar(node: ScalarNode): Json = if (!node.getTag.startsWith(Tag.PREFIX)) {
Json.fromJsonObject(JsonObject.singleton(node.getTag.getValue.stripPrefix("!"), Json.fromString(node.getValue)))
} else Json.fromString(node.getValue)

def fromValues(ts: Iterable[Json]): Json = Json.fromValues(ts)
def fromFields(ts: Iterable[(String, Json)]): Json = Json.fromFields(ts)
}

case class ConfiguredAlg(
numericTimestamps: Boolean
) extends DefaultAlg {
final override def timestamp(node: ScalarNode): Json = if (!numericTimestamps) {
super.timestamp(node)
} else {
val constructor = new SafeConstructor.ConstructYamlTimestamp()
constructor.construct(node)
Json.fromLong(constructor.getCalendar.getTimeInMillis)
}
}
91 changes: 91 additions & 0 deletions src/main/scala/io/circe/yaml/parser/Parser.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package io.circe.yaml.parser


import cats.data.ValidatedNel
import cats.syntax.either._
import io.circe._
import java.io.{Reader, StringReader}
import org.yaml.snakeyaml.Yaml
import scala.collection.JavaConverters._

class Parser(algebra: NodeAlg[Json] = new DefaultAlg) {

/**
* Configure the parser
* @param numericTimestamps if true, timestamps will be returned as epoch millisecond [[Long]]s
* @return A configured parser
*/
def configured(
numericTimestamps: Boolean = false
): Parser = new Parser(ConfiguredAlg(
numericTimestamps = numericTimestamps
))


/**
* Parse YAML from the given [[Reader]], returning either [[ParsingFailure]] or [[Json]]
*/
def parse(yaml: Reader): Either[ParsingFailure, Json] = for {
parsed <- parseSingle(yaml)
json <- Either.catchNonFatal(algebra.any(parsed)).leftMap {
case p @ ParsingFailure(_, _) => p
case err => ParsingFailure(err.getMessage, err)
}
} yield json

/**
* Parse YAML from the given [[Reader]], accumulating errors and returning either a list of [[ParsingFailure]]s
* or a [[Json]]
*/
def parseAccumulating(yaml: Reader): ValidatedNel[ParsingFailure, Json] = for {
parsed <- parseSingle(yaml)
json <- new AccumlatingAlg(algebra).any(parsed)
} yield json

/**
* Parse YAML from the given string, returning either [[ParsingFailure]] or [[Json]]
*/
def parse(yaml: String): Either[ParsingFailure, Json] = parse(new StringReader(yaml))

/**
* Parse YAML from the given string, accumulating errors and returning either a list of [[ParsingFailure]]s
* or a [[Json]]
*/
def parseAccumulating(yaml: String): ValidatedNel[ParsingFailure, Json] = parseAccumulating(new StringReader(yaml))

/**
* Parse a succession of documents from the given [[Reader]], returning the result as a [[Stream]] of [[Either]]
*/
def parseDocuments(yaml: Reader): Stream[Either[ParsingFailure, Json]] = {
val alg = new LiftedAlg(algebra)
parseStream(yaml).map(alg.any)
}

/**
* Parse a succession of documents from the given [[Reader]], accumulating errors within each document and
* returning the result as a [[Stream]] of [[ValidatedNel]]
*/
def parseDocumentsAccumulating(yaml: Reader): Stream[ValidatedNel[ParsingFailure, Json]] = {
val alg = new AccumlatingAlg(algebra)
parseStream(yaml).map(alg.any)
}

/**
* Parse a succession of documents from the given string, returning the result as a [[Stream]] of [[Either]]
*/
def parseDocuments(yaml: String): Stream[Either[ParsingFailure, Json]] = parseDocuments(new StringReader(yaml))

/**
* Parse a succession of documents from the given string, accumulating errors within each document and
* returning the result as a [[Stream]] of [[ValidatedNel]]
*/
def parseDocumentsAccumulating(yaml: String): Stream[ValidatedNel[ParsingFailure, Json]] =
parseDocumentsAccumulating(new StringReader(yaml))

private[this] def parseSingle(reader: Reader) =
Either.catchNonFatal(new Yaml().compose(reader)).leftMap(err => ParsingFailure(err.getMessage, err))

private[this] def parseStream(reader: Reader) =
new Yaml().composeAll(reader).asScala.toStream

}
93 changes: 1 addition & 92 deletions src/main/scala/io/circe/yaml/parser/package.scala
Original file line number Diff line number Diff line change
@@ -1,94 +1,3 @@
package io.circe.yaml

import cats.syntax.either._
import io.circe._
import java.io.{Reader, StringReader}
import org.yaml.snakeyaml.Yaml
import org.yaml.snakeyaml.constructor.SafeConstructor
import org.yaml.snakeyaml.nodes._
import scala.collection.JavaConverters._

package object parser {


/**
* Parse YAML from the given [[Reader]], returning either [[ParsingFailure]] or [[Json]]
* @param yaml
* @return
*/
def parse(yaml: Reader): Either[ParsingFailure, Json] = for {
parsed <- parseSingle(yaml)
json <- yamlToJson(parsed)
} yield json

def parse(yaml: String): Either[ParsingFailure, Json] = parse(new StringReader(yaml))

def parseDocuments(yaml: Reader): Stream[Either[ParsingFailure, Json]] = parseStream(yaml).map(yamlToJson)
def parseDocuments(yaml: String): Stream[Either[ParsingFailure, Json]] = parseDocuments(new StringReader(yaml))

private[this] def parseSingle(reader: Reader) =
Either.catchNonFatal(new Yaml().compose(reader)).leftMap(err => ParsingFailure(err.getMessage, err))

private[this] def parseStream(reader: Reader) =
new Yaml().composeAll(reader).asScala.toStream

private[this] object CustomTag {
def unapply(tag: Tag): Option[String] = if (!tag.startsWith(Tag.PREFIX))
Some(tag.getValue)
else
None
}

private[this] class FlatteningConstructor extends SafeConstructor {
def flatten(node: MappingNode): MappingNode = {
flattenMapping(node)
node
}
}

private[this] val flattener: FlatteningConstructor = new FlatteningConstructor

private[this] def yamlToJson(node: Node): Either[ParsingFailure, Json] = {

def convertScalarNode(node: ScalarNode) = Either.catchNonFatal(node.getTag match {
case Tag.INT | Tag.FLOAT => JsonNumber.fromString(node.getValue).map(Json.fromJsonNumber).getOrElse {
throw new NumberFormatException(s"Invalid numeric string ${node.getValue}")
}
case Tag.BOOL => Json.fromBoolean(node.getValue.toBoolean)
case Tag.NULL => Json.Null
case CustomTag(other) =>
Json.fromJsonObject(JsonObject.singleton(other.stripPrefix("!"), Json.fromString(node.getValue)))
case other => Json.fromString(node.getValue)
}).leftMap {
err =>
ParsingFailure(err.getMessage, err)
}

def convertKeyNode(node: Node) = node match {
case scalar: ScalarNode => Right(scalar.getValue)
case _ => Left(ParsingFailure("Only string keys can be represented in JSON", null))
}

node match {
case mapping: MappingNode =>
flattener.flatten(mapping).getValue.asScala.foldLeft(
Either.right[ParsingFailure, JsonObject](JsonObject.empty)
) {
(objEither, tup) => for {
obj <- objEither
key <- convertKeyNode(tup.getKeyNode)
value <- yamlToJson(tup.getValueNode)
} yield obj.add(key, value)
}.map(Json.fromJsonObject)
case sequence: SequenceNode =>
sequence.getValue.asScala.foldLeft(Either.right[ParsingFailure, List[Json]](List.empty[Json])) {
(arrEither, node) => for {
arr <- arrEither
value <- yamlToJson(node)
} yield value :: arr
}.map(arr => Json.fromValues(arr.reverse))
case scalar: ScalarNode => convertScalarNode(scalar)
}
}

}
package object parser extends Parser

0 comments on commit c9763fc

Please sign in to comment.