-
Notifications
You must be signed in to change notification settings - Fork 50
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
This approach moves the node handling to an object algebra. This allows for relatively easily adding configurable stuff that's able to change how various YAML types are handled as the need arises. One can even plug in their own algebra if they want to get nitty-gritty. It also allows adding optional error accumulation, which is something that's always felt missing. A nice thing here is by default we don't have to even check any configuration - only when a parser instance is configured do we change to `ConfiguredAlg`. Also should be binary compatible (and `yaml.parser.configure(...)` is a nice way to get a configured parser!). TODO: Add tests around error accumulation and such (coverage currently lacking)
- Loading branch information
1 parent
befb2b6
commit c9763fc
Showing
3 changed files
with
252 additions
and
92 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
package io.circe.yaml.parser | ||
|
||
import cats.data.ValidatedNel | ||
import cats.instances.either._ | ||
import cats.instances.list._ | ||
import cats.syntax.either._ | ||
import cats.syntax.traverse._ | ||
import io.circe.{Json, JsonNumber, JsonObject, ParsingFailure} | ||
import org.yaml.snakeyaml.constructor.SafeConstructor | ||
import org.yaml.snakeyaml.nodes._ | ||
import scala.collection.JavaConverters._ | ||
import scala.collection.immutable.Queue | ||
|
||
abstract class NodeAlg[T] { | ||
def int(node: ScalarNode): T | ||
def float(node: ScalarNode): T | ||
def timestamp(node: ScalarNode): T | ||
def bool(node: ScalarNode): T | ||
def yNull(node: ScalarNode): T | ||
def string(node: ScalarNode): T | ||
def otherScalar(node: ScalarNode): T | ||
|
||
def sequence(node: SequenceNode): T = fromValues { | ||
node.getValue.asScala.foldLeft(Queue.empty[T]) { | ||
(accum, next) => accum enqueue any(next) | ||
} | ||
} | ||
|
||
def mapping(node: MappingNode): T = fromFields { | ||
node.getValue.asScala.map { | ||
nodeTuple => nodeTuple.getKeyNode match { | ||
case keyNode: ScalarNode => keyNode.getValue -> any(nodeTuple.getValueNode) | ||
case _ => throw ParsingFailure("Only string keys can be represented in JSON", null) | ||
} | ||
} | ||
} | ||
|
||
def fromValues(ts: Iterable[T]): T | ||
def fromFields(ts: Iterable[(String, T)]): T | ||
|
||
final def any(node: Node): T = node match { | ||
case node: ScalarNode => node.getTag match { | ||
case Tag.INT => int(node) | ||
case Tag.FLOAT => float(node) | ||
case Tag.TIMESTAMP => timestamp(node) | ||
case Tag.BOOL => bool(node) | ||
case Tag.NULL => yNull(node) | ||
case Tag.STR => string(node) | ||
case _ => otherScalar(node) | ||
} | ||
case node: SequenceNode => sequence(node) | ||
case node: MappingNode => mapping(node) | ||
} | ||
} | ||
|
||
final class LiftedAlg[A](lifted: NodeAlg[A]) extends NodeAlg[Either[ParsingFailure, A]] { | ||
private def wrap(what: String)(err: Throwable) = ParsingFailure(s"Failed to parse $what", err) | ||
def int(node: ScalarNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.int(node)).leftMap(wrap("integer value")) | ||
|
||
def float(node: ScalarNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.float(node)).leftMap(wrap("float value")) | ||
|
||
def timestamp(node: ScalarNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.timestamp(node)).leftMap(wrap("timestamp value")) | ||
|
||
def bool(node: ScalarNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.bool(node)).leftMap(wrap("boolean value")) | ||
|
||
def yNull(node: ScalarNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.yNull(node)).leftMap(wrap("null value")) | ||
|
||
def string(node: ScalarNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.string(node)).leftMap(wrap("string value")) | ||
|
||
def otherScalar(node: ScalarNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.otherScalar(node)).leftMap(wrap("scalar value")) | ||
|
||
override def sequence(node: SequenceNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.sequence(node)).leftMap(wrap("sequence")) | ||
|
||
override def mapping(node: MappingNode): Either[ParsingFailure, A] = | ||
Either.catchNonFatal(lifted.mapping(node)).leftMap(wrap("mapping")) | ||
|
||
def fromValues(ts: Iterable[Either[ParsingFailure, A]]): Either[ParsingFailure, A] = try { | ||
Either.right { | ||
lifted.fromValues { | ||
ts.map(_.valueOr(throw _)) | ||
} | ||
} | ||
} catch { | ||
case f @ ParsingFailure(_, _) => Either.left(f) | ||
} | ||
|
||
def fromFields(ts: Iterable[(String, Either[ParsingFailure, A])]): Either[ParsingFailure, A] = try { | ||
Either.right { | ||
lifted.fromFields { | ||
ts.map { | ||
case (key, value) => key -> value.valueOr(throw _) | ||
} | ||
} | ||
} | ||
} catch { | ||
case f @ ParsingFailure(_, _) => Either.left(f) | ||
} | ||
} | ||
|
||
final class AccumlatingAlg[A](base: NodeAlg[A]) extends NodeAlg[ValidatedNel[ParsingFailure, A]] { | ||
private val lifted = new LiftedAlg(base) | ||
def int(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.int(node).toValidatedNel | ||
def float(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.float(node).toValidatedNel | ||
def timestamp(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.timestamp(node).toValidatedNel | ||
def bool(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.bool(node).toValidatedNel | ||
def yNull(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.yNull(node).toValidatedNel | ||
def string(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.string(node).toValidatedNel | ||
def otherScalar(node: ScalarNode): ValidatedNel[ParsingFailure, A] = lifted.otherScalar(node).toValidatedNel | ||
|
||
def fromFields(ts: Iterable[(String, ValidatedNel[ParsingFailure, A])]): ValidatedNel[ParsingFailure, A] = | ||
ts.toList.traverseU { | ||
case (key, value) => value.map(key -> _) | ||
}.map(base.fromFields) | ||
|
||
def fromValues(ts: Iterable[ValidatedNel[ParsingFailure, A]]): ValidatedNel[ParsingFailure, A] = | ||
ts.toList.sequenceU.map(base.fromValues) | ||
} | ||
|
||
class DefaultAlg extends NodeAlg[Json] { | ||
protected object Constructor extends SafeConstructor { | ||
def flatten(node: MappingNode): Unit = flattenMapping(node) | ||
} | ||
|
||
final protected def number(str: String): Json = JsonNumber.fromString(str).map(Json.fromJsonNumber).getOrElse { | ||
throw new NumberFormatException(s"Invalid numeric string $str") | ||
} | ||
|
||
def int(node: ScalarNode): Json = number(node.getValue) | ||
def float(node: ScalarNode): Json = number(node.getValue) | ||
def timestamp(node: ScalarNode): Json = Json.fromString(node.getValue) | ||
def bool(node: ScalarNode): Json = Json.fromBoolean(node.getValue.toBoolean) | ||
def yNull(node: ScalarNode): Json = Json.Null | ||
def string(node: ScalarNode): Json = Json.fromString(node.getValue) | ||
def otherScalar(node: ScalarNode): Json = if (!node.getTag.startsWith(Tag.PREFIX)) { | ||
Json.fromJsonObject(JsonObject.singleton(node.getTag.getValue.stripPrefix("!"), Json.fromString(node.getValue))) | ||
} else Json.fromString(node.getValue) | ||
|
||
def fromValues(ts: Iterable[Json]): Json = Json.fromValues(ts) | ||
def fromFields(ts: Iterable[(String, Json)]): Json = Json.fromFields(ts) | ||
} | ||
|
||
case class ConfiguredAlg( | ||
numericTimestamps: Boolean | ||
) extends DefaultAlg { | ||
final override def timestamp(node: ScalarNode): Json = if (!numericTimestamps) { | ||
super.timestamp(node) | ||
} else { | ||
val constructor = new SafeConstructor.ConstructYamlTimestamp() | ||
constructor.construct(node) | ||
Json.fromLong(constructor.getCalendar.getTimeInMillis) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
package io.circe.yaml.parser | ||
|
||
|
||
import cats.data.ValidatedNel | ||
import cats.syntax.either._ | ||
import io.circe._ | ||
import java.io.{Reader, StringReader} | ||
import org.yaml.snakeyaml.Yaml | ||
import scala.collection.JavaConverters._ | ||
|
||
class Parser(algebra: NodeAlg[Json] = new DefaultAlg) { | ||
|
||
/** | ||
* Configure the parser | ||
* @param numericTimestamps if true, timestamps will be returned as epoch millisecond [[Long]]s | ||
* @return A configured parser | ||
*/ | ||
def configured( | ||
numericTimestamps: Boolean = false | ||
): Parser = new Parser(ConfiguredAlg( | ||
numericTimestamps = numericTimestamps | ||
)) | ||
|
||
|
||
/** | ||
* Parse YAML from the given [[Reader]], returning either [[ParsingFailure]] or [[Json]] | ||
*/ | ||
def parse(yaml: Reader): Either[ParsingFailure, Json] = for { | ||
parsed <- parseSingle(yaml) | ||
json <- Either.catchNonFatal(algebra.any(parsed)).leftMap { | ||
case p @ ParsingFailure(_, _) => p | ||
case err => ParsingFailure(err.getMessage, err) | ||
} | ||
} yield json | ||
|
||
/** | ||
* Parse YAML from the given [[Reader]], accumulating errors and returning either a list of [[ParsingFailure]]s | ||
* or a [[Json]] | ||
*/ | ||
def parseAccumulating(yaml: Reader): ValidatedNel[ParsingFailure, Json] = for { | ||
parsed <- parseSingle(yaml) | ||
json <- new AccumlatingAlg(algebra).any(parsed) | ||
} yield json | ||
|
||
/** | ||
* Parse YAML from the given string, returning either [[ParsingFailure]] or [[Json]] | ||
*/ | ||
def parse(yaml: String): Either[ParsingFailure, Json] = parse(new StringReader(yaml)) | ||
|
||
/** | ||
* Parse YAML from the given string, accumulating errors and returning either a list of [[ParsingFailure]]s | ||
* or a [[Json]] | ||
*/ | ||
def parseAccumulating(yaml: String): ValidatedNel[ParsingFailure, Json] = parseAccumulating(new StringReader(yaml)) | ||
|
||
/** | ||
* Parse a succession of documents from the given [[Reader]], returning the result as a [[Stream]] of [[Either]] | ||
*/ | ||
def parseDocuments(yaml: Reader): Stream[Either[ParsingFailure, Json]] = { | ||
val alg = new LiftedAlg(algebra) | ||
parseStream(yaml).map(alg.any) | ||
} | ||
|
||
/** | ||
* Parse a succession of documents from the given [[Reader]], accumulating errors within each document and | ||
* returning the result as a [[Stream]] of [[ValidatedNel]] | ||
*/ | ||
def parseDocumentsAccumulating(yaml: Reader): Stream[ValidatedNel[ParsingFailure, Json]] = { | ||
val alg = new AccumlatingAlg(algebra) | ||
parseStream(yaml).map(alg.any) | ||
} | ||
|
||
/** | ||
* Parse a succession of documents from the given string, returning the result as a [[Stream]] of [[Either]] | ||
*/ | ||
def parseDocuments(yaml: String): Stream[Either[ParsingFailure, Json]] = parseDocuments(new StringReader(yaml)) | ||
|
||
/** | ||
* Parse a succession of documents from the given string, accumulating errors within each document and | ||
* returning the result as a [[Stream]] of [[ValidatedNel]] | ||
*/ | ||
def parseDocumentsAccumulating(yaml: String): Stream[ValidatedNel[ParsingFailure, Json]] = | ||
parseDocumentsAccumulating(new StringReader(yaml)) | ||
|
||
private[this] def parseSingle(reader: Reader) = | ||
Either.catchNonFatal(new Yaml().compose(reader)).leftMap(err => ParsingFailure(err.getMessage, err)) | ||
|
||
private[this] def parseStream(reader: Reader) = | ||
new Yaml().composeAll(reader).asScala.toStream | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,94 +1,3 @@ | ||
package io.circe.yaml | ||
|
||
import cats.syntax.either._ | ||
import io.circe._ | ||
import java.io.{Reader, StringReader} | ||
import org.yaml.snakeyaml.Yaml | ||
import org.yaml.snakeyaml.constructor.SafeConstructor | ||
import org.yaml.snakeyaml.nodes._ | ||
import scala.collection.JavaConverters._ | ||
|
||
package object parser { | ||
|
||
|
||
/** | ||
* Parse YAML from the given [[Reader]], returning either [[ParsingFailure]] or [[Json]] | ||
* @param yaml | ||
* @return | ||
*/ | ||
def parse(yaml: Reader): Either[ParsingFailure, Json] = for { | ||
parsed <- parseSingle(yaml) | ||
json <- yamlToJson(parsed) | ||
} yield json | ||
|
||
def parse(yaml: String): Either[ParsingFailure, Json] = parse(new StringReader(yaml)) | ||
|
||
def parseDocuments(yaml: Reader): Stream[Either[ParsingFailure, Json]] = parseStream(yaml).map(yamlToJson) | ||
def parseDocuments(yaml: String): Stream[Either[ParsingFailure, Json]] = parseDocuments(new StringReader(yaml)) | ||
|
||
private[this] def parseSingle(reader: Reader) = | ||
Either.catchNonFatal(new Yaml().compose(reader)).leftMap(err => ParsingFailure(err.getMessage, err)) | ||
|
||
private[this] def parseStream(reader: Reader) = | ||
new Yaml().composeAll(reader).asScala.toStream | ||
|
||
private[this] object CustomTag { | ||
def unapply(tag: Tag): Option[String] = if (!tag.startsWith(Tag.PREFIX)) | ||
Some(tag.getValue) | ||
else | ||
None | ||
} | ||
|
||
private[this] class FlatteningConstructor extends SafeConstructor { | ||
def flatten(node: MappingNode): MappingNode = { | ||
flattenMapping(node) | ||
node | ||
} | ||
} | ||
|
||
private[this] val flattener: FlatteningConstructor = new FlatteningConstructor | ||
|
||
private[this] def yamlToJson(node: Node): Either[ParsingFailure, Json] = { | ||
|
||
def convertScalarNode(node: ScalarNode) = Either.catchNonFatal(node.getTag match { | ||
case Tag.INT | Tag.FLOAT => JsonNumber.fromString(node.getValue).map(Json.fromJsonNumber).getOrElse { | ||
throw new NumberFormatException(s"Invalid numeric string ${node.getValue}") | ||
} | ||
case Tag.BOOL => Json.fromBoolean(node.getValue.toBoolean) | ||
case Tag.NULL => Json.Null | ||
case CustomTag(other) => | ||
Json.fromJsonObject(JsonObject.singleton(other.stripPrefix("!"), Json.fromString(node.getValue))) | ||
case other => Json.fromString(node.getValue) | ||
}).leftMap { | ||
err => | ||
ParsingFailure(err.getMessage, err) | ||
} | ||
|
||
def convertKeyNode(node: Node) = node match { | ||
case scalar: ScalarNode => Right(scalar.getValue) | ||
case _ => Left(ParsingFailure("Only string keys can be represented in JSON", null)) | ||
} | ||
|
||
node match { | ||
case mapping: MappingNode => | ||
flattener.flatten(mapping).getValue.asScala.foldLeft( | ||
Either.right[ParsingFailure, JsonObject](JsonObject.empty) | ||
) { | ||
(objEither, tup) => for { | ||
obj <- objEither | ||
key <- convertKeyNode(tup.getKeyNode) | ||
value <- yamlToJson(tup.getValueNode) | ||
} yield obj.add(key, value) | ||
}.map(Json.fromJsonObject) | ||
case sequence: SequenceNode => | ||
sequence.getValue.asScala.foldLeft(Either.right[ParsingFailure, List[Json]](List.empty[Json])) { | ||
(arrEither, node) => for { | ||
arr <- arrEither | ||
value <- yamlToJson(node) | ||
} yield value :: arr | ||
}.map(arr => Json.fromValues(arr.reverse)) | ||
case scalar: ScalarNode => convertScalarNode(scalar) | ||
} | ||
} | ||
|
||
} | ||
package object parser extends Parser |