From 1c07b81900d07d8dd910ec3b8ca7a4df57fc5ddc Mon Sep 17 00:00:00 2001 From: Olga Bachishe Date: Wed, 24 Apr 2024 15:38:40 +0300 Subject: [PATCH] Add ast extraction and writing in dot --- .../kotlin/org/ucfs/IGeneratorFromGrammar.kt | 9 ++- .../main/kotlin/org/ucfs/ast/AstExtractor.kt | 60 ++++++++++++--- .../src/main/kotlin/org/ucfs/ast/DotWriter.kt | 74 +++++++++++++++++++ .../src/main/kotlin/org/ucfs/ast/Node.kt | 11 ++- .../org/ucfs/ast/NodeClassesGenerator.kt | 3 +- .../main/kotlin/org/ucfs/ast/TerminalNode.kt | 10 ++- .../main/kotlin/org/ucfs/examples/Examples.kt | 29 ++++++++ .../DyckGrammar.kt} | 15 ++-- .../org/ucfs/examples/golang/SimpleGolang.kt | 32 ++++++++ .../org/ucfs/parser/IParserGenerator.kt | 3 +- .../combinator/extension/StringExtension.kt | 2 + .../grammar/combinator/regexp/Alternative.kt | 5 -- 12 files changed, 217 insertions(+), 36 deletions(-) create mode 100644 generator/src/main/kotlin/org/ucfs/ast/DotWriter.kt create mode 100644 generator/src/main/kotlin/org/ucfs/examples/Examples.kt rename generator/src/main/kotlin/org/ucfs/examples/{AstGeneratorExample.kt => dyck/DyckGrammar.kt} (63%) create mode 100644 generator/src/main/kotlin/org/ucfs/examples/golang/SimpleGolang.kt diff --git a/generator/src/main/kotlin/org/ucfs/IGeneratorFromGrammar.kt b/generator/src/main/kotlin/org/ucfs/IGeneratorFromGrammar.kt index ef00b3a44..7b246e559 100644 --- a/generator/src/main/kotlin/org/ucfs/IGeneratorFromGrammar.kt +++ b/generator/src/main/kotlin/org/ucfs/IGeneratorFromGrammar.kt @@ -1,10 +1,9 @@ package org.ucfs -import com.squareup.kotlinpoet.AnnotationSpec -import com.squareup.kotlinpoet.ClassName -import com.squareup.kotlinpoet.FileSpec +import com.squareup.kotlinpoet.* import org.ucfs.grammar.combinator.Grammar import java.nio.file.Path +import kotlin.reflect.KClass /** * Common logic for generators that use a Grammar class @@ -41,4 +40,6 @@ internal fun FileSpec.Builder.suppressWarningTypes(vararg types: String) { .addMember(format, *types) .build() ) -} \ No newline at end of file +} + +fun TypeName.nullable(): TypeName = this.copy(nullable = true) diff --git a/generator/src/main/kotlin/org/ucfs/ast/AstExtractor.kt b/generator/src/main/kotlin/org/ucfs/ast/AstExtractor.kt index 4a19a3fe6..8b541a6c2 100644 --- a/generator/src/main/kotlin/org/ucfs/ast/AstExtractor.kt +++ b/generator/src/main/kotlin/org/ucfs/ast/AstExtractor.kt @@ -1,24 +1,66 @@ package org.ucfs.ast -import org.ucfs.sppf.node.ISppfNode -import org.ucfs.sppf.node.IntermediateSppfNode -import org.ucfs.sppf.node.PackedSppfNode -import org.ucfs.sppf.node.SymbolSppfNode +import org.ucfs.GeneratorException +import org.ucfs.rsm.symbol.Nonterminal +import org.ucfs.sppf.node.* -object AstExtractor { - fun extract(sppf: ISppfNode): Node { +class AstExtractor(val pkg: String) { + val nonterminalToClass = HashMap>() + fun extract(sppf: ISppfNode?): Node { + val root = Node(null, 0) + extract(sppf, root, null) + return root.children.firstOrNull() ?: root + } + private fun getOffset(left: Node?, parent: Node): Int { + return if (left == null) { + parent.offset + } else { + left.offset + left.length + } + } + + /** + * return rightest node of subtree + */ + private fun extract(sppf: ISppfNode?, parent: Node, left: Node?): Node? { when (sppf) { is PackedSppfNode<*> -> { + val newLeft = extract(sppf.leftSppfNode, parent, left) + return extract(sppf.rightSppfNode, parent, newLeft) + } + is IntermediateSppfNode<*> -> { + return extract(sppf.children.firstOrNull(), parent, left) } - is IntermediateSppfNode<*> -> TODO() is SymbolSppfNode<*> -> { - val className = NodeClassesGenerator.getClassName(sppf.symbol) + val nodeClass = getNodeClass(sppf.symbol) + val ctor = nodeClass.getConstructor(Node::class.java, Int::class.java) + val node: Node = ctor.newInstance(parent, getOffset(left, parent)) as Node + node.left = left + parent.children.add(node) + val packedNode: PackedSppfNode<*> = sppf.children.first() + extract(packedNode, node, null) + parent.length += node.length + return node } + + is TerminalSppfNode<*> -> { + val node = TerminalNode(parent, getOffset(left, parent), sppf.terminal, left) + parent.children.add(node) + parent.length += sppf.terminal.toString().length + return node + } + + null -> return null + else -> throw GeneratorException("Unknown sppf node type : $sppf") } - TODO() + } + + private fun getNodeClass(nt: Nonterminal): Class<*> { + return nonterminalToClass.getOrPut(nt) + { Class.forName("$pkg.${NodeClassesGenerator.getClassName(nt)}") } } } \ No newline at end of file diff --git a/generator/src/main/kotlin/org/ucfs/ast/DotWriter.kt b/generator/src/main/kotlin/org/ucfs/ast/DotWriter.kt new file mode 100644 index 000000000..c0872f1d1 --- /dev/null +++ b/generator/src/main/kotlin/org/ucfs/ast/DotWriter.kt @@ -0,0 +1,74 @@ +package org.ucfs.ast + +import java.nio.file.Files +import java.nio.file.Path + +class DotWriter { + private var lastId = 0 + private val showSiblings = false + val ids: HashMap = HashMap() + fun getId(node: Node): Int { + return ids.getOrPut(node) { lastId++ } + } + + fun getDotView(root: Node, label: String = "AST"): String { + val view = StringBuilder("digraph g {") + view.append("label=\"$label\"") + view.append(handleNode(root)) + view.append("}") + return view.toString() + } + + private fun getNodeLabel(node: Node): String { + val view = StringBuilder("label = \"") + when (node) { + is TerminalNode<*> -> { + view.append(node.terminal.toString()) + } + else -> { + view.append(node.javaClass.simpleName) + } + } + view.append("\noffset = ${node.offset}") + view.append("\nlength = ${node.length}") + view.append("\"") + return view.toString() + } + + private fun getNodeView(node: Node): StringBuilder { + val view = StringBuilder("\n${getId(node)} [ ${getNodeLabel(node)}") + if(node is TerminalNode<*>){ + view.append(", color = green") + } + view.append("]") + return view + } + + fun handleNode(node: Node): String { + val id = getId(node) + val view = getNodeView(node) + val left = node.left + + if (showSiblings && left != null) { + view.append("\n$id -> ${getId(left)} [color=blue]") + } + + for (child in node.children) { + view.append("\n$id -> ${getId(child)}") + view.append(handleNode(child)) + } + return view.toString() + } + + fun writeToFile(view: String, filePath: Path) { + val genPath = Path.of("gen") + Files.createDirectories(genPath) + val file = genPath.resolve(filePath).toFile() + file.writeText(view) + } + + fun writeToFile(root: Node, fileName: String, label: String = "AST") { + writeToFile(getDotView(root, label), Path.of("$fileName.dot")) + } + +} \ No newline at end of file diff --git a/generator/src/main/kotlin/org/ucfs/ast/Node.kt b/generator/src/main/kotlin/org/ucfs/ast/Node.kt index e3ad481c5..9de51ee0f 100644 --- a/generator/src/main/kotlin/org/ucfs/ast/Node.kt +++ b/generator/src/main/kotlin/org/ucfs/ast/Node.kt @@ -5,9 +5,12 @@ package org.ucfs.ast * - sppfNode (internalNode) * - constructor (parent, sppfNode, offset) */ -abstract class Node( - var children: List, +open class Node( var parent: Node?, var offset: Int, - var length: Int -) \ No newline at end of file +){ + var length: Int = 0 + open var left: Node? = null + var right: Node? = null + var children: ArrayList = ArrayList() +} \ No newline at end of file diff --git a/generator/src/main/kotlin/org/ucfs/ast/NodeClassesGenerator.kt b/generator/src/main/kotlin/org/ucfs/ast/NodeClassesGenerator.kt index 03848d126..9521aa1ce 100644 --- a/generator/src/main/kotlin/org/ucfs/ast/NodeClassesGenerator.kt +++ b/generator/src/main/kotlin/org/ucfs/ast/NodeClassesGenerator.kt @@ -67,8 +67,7 @@ class NodeClassesGenerator(override val grammarClazz: Class<*>) : return FunSpec.constructorBuilder() .addParameter(PARENT, superClass) .addParameter(OFFSET, Int::class) - .addParameter(LENGTH, Int::class) - .callSuperConstructor(PARENT, OFFSET, LENGTH) + .callSuperConstructor(PARENT, OFFSET) .build() } diff --git a/generator/src/main/kotlin/org/ucfs/ast/TerminalNode.kt b/generator/src/main/kotlin/org/ucfs/ast/TerminalNode.kt index 243b2f6fe..98ade18f2 100644 --- a/generator/src/main/kotlin/org/ucfs/ast/TerminalNode.kt +++ b/generator/src/main/kotlin/org/ucfs/ast/TerminalNode.kt @@ -1,4 +1,10 @@ package org.ucfs.ast -class TerminalNode(parent: Node, offset: Int, length: Int) : - Node(emptyList(), parent, offset, length) \ No newline at end of file +import org.ucfs.rsm.symbol.ITerminal + +class TerminalNode(parent: Node, offset: Int, val terminal: T?, override var left: Node?) : + Node(parent, offset) { + init { + length = terminal.toString().length + } +} \ No newline at end of file diff --git a/generator/src/main/kotlin/org/ucfs/examples/Examples.kt b/generator/src/main/kotlin/org/ucfs/examples/Examples.kt new file mode 100644 index 000000000..daaf4b2b8 --- /dev/null +++ b/generator/src/main/kotlin/org/ucfs/examples/Examples.kt @@ -0,0 +1,29 @@ +package org.ucfs.examples + +import org.ucfs.ast.AstExtractor +import org.ucfs.ast.DotWriter +import org.ucfs.ast.NodeClassesGenerator +import org.ucfs.examples.dyck.DyckGrammar +import org.ucfs.examples.golang.SimpleGolang +import org.ucfs.grammar.combinator.Grammar +import org.ucfs.input.LinearInput +import org.ucfs.parser.Gll +import org.ucfs.sppf.writeSppfToDot +import java.nio.file.Path + +object Examples { + fun generateAst(grammar: Grammar, pkg: String, input: String){ + val grammarClass = grammar::class.java + NodeClassesGenerator(grammarClass).generate(Path.of("generator", "src", "main", "kotlin"), pkg) + val gll = Gll.gll(grammar.rsm, LinearInput.buildFromString(input)) + val sppf = gll.parse().first + val ast = AstExtractor(pkg).extract(sppf) + DotWriter().writeToFile(ast, "${grammarClass.simpleName}_ast", "${grammarClass.simpleName} AST") + writeSppfToDot(sppf!!, Path.of("${grammarClass.simpleName}_sppf.dot").toString()) + } +} + +fun main(){ + Examples.generateAst(SimpleGolang(), "org.ucfs.examples.golang", "r 1 + 1 ;") + Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", "[ ( ) ] ") +} \ No newline at end of file diff --git a/generator/src/main/kotlin/org/ucfs/examples/AstGeneratorExample.kt b/generator/src/main/kotlin/org/ucfs/examples/dyck/DyckGrammar.kt similarity index 63% rename from generator/src/main/kotlin/org/ucfs/examples/AstGeneratorExample.kt rename to generator/src/main/kotlin/org/ucfs/examples/dyck/DyckGrammar.kt index 3c94b32ba..409b10ff8 100644 --- a/generator/src/main/kotlin/org/ucfs/examples/AstGeneratorExample.kt +++ b/generator/src/main/kotlin/org/ucfs/examples/dyck/DyckGrammar.kt @@ -1,26 +1,23 @@ -package org.ucfs.examples +package org.ucfs.examples.dyck -import org.ucfs.ast.NodeClassesGenerator import org.ucfs.grammar.combinator.Grammar import org.ucfs.grammar.combinator.extension.StringExtension.times import org.ucfs.grammar.combinator.regexp.Epsilon +import org.ucfs.grammar.combinator.regexp.times import org.ucfs.grammar.combinator.regexp.Nt import org.ucfs.grammar.combinator.regexp.or -import java.nio.file.Path -class DyckGrammar: Grammar(){ +class DyckGrammar : Grammar() { var S by Nt() var Round by Nt() var Quadrat by Nt() var Curly by Nt() - init{ + + init { setStart(S) - S = Round or Quadrat or Epsilon + S = (Round or Quadrat or Curly or Epsilon) Round = "(" * S * ")" Quadrat = "[" * S * "]" Curly = "{" * S * "}" } } -fun main(){ - NodeClassesGenerator(DyckGrammar::class.java).generate(Path.of("gen"), "") -} \ No newline at end of file diff --git a/generator/src/main/kotlin/org/ucfs/examples/golang/SimpleGolang.kt b/generator/src/main/kotlin/org/ucfs/examples/golang/SimpleGolang.kt new file mode 100644 index 000000000..e140bdbd5 --- /dev/null +++ b/generator/src/main/kotlin/org/ucfs/examples/golang/SimpleGolang.kt @@ -0,0 +1,32 @@ +package org.ucfs.examples.golang + +import org.ucfs.ast.AstExtractor +import org.ucfs.ast.DotWriter +import org.ucfs.ast.NodeClassesGenerator +import org.ucfs.grammar.combinator.Grammar +import org.ucfs.grammar.combinator.extension.StringExtension.or +import org.ucfs.grammar.combinator.extension.StringExtension.times +import org.ucfs.grammar.combinator.regexp.Many +import org.ucfs.grammar.combinator.regexp.Nt +import org.ucfs.grammar.combinator.regexp.or +import org.ucfs.input.LinearInput +import org.ucfs.input.LinearInputLabel +import org.ucfs.parser.Gll +import org.ucfs.sppf.writeSppfToDot +import java.nio.file.Path +import javax.sound.sampled.Line + +class SimpleGolang : Grammar() { + var Program by Nt() + var Block by Nt() + var Statement by Nt() + var IntExpr by Nt() + + init { + setStart(Program) + Program = Block + Block = Many(Statement) + Statement = IntExpr * ";" or "r" * IntExpr * ";" + IntExpr = "1" or "1" * "+" * "1" + } +} diff --git a/generator/src/main/kotlin/org/ucfs/parser/IParserGenerator.kt b/generator/src/main/kotlin/org/ucfs/parser/IParserGenerator.kt index 2531a1dee..9a0a87e9a 100644 --- a/generator/src/main/kotlin/org/ucfs/parser/IParserGenerator.kt +++ b/generator/src/main/kotlin/org/ucfs/parser/IParserGenerator.kt @@ -8,6 +8,7 @@ import org.ucfs.descriptors.Descriptor import org.ucfs.grammar.combinator.Grammar import org.ucfs.grammar.combinator.regexp.Nt import org.ucfs.input.ILabel +import org.ucfs.nullable import org.ucfs.parser.context.IContext import org.ucfs.rsm.RsmState import org.ucfs.rsm.symbol.ITerminal @@ -29,7 +30,7 @@ interface IParserGenerator : IGeneratorFromGrammar { const val GRAMMAR_NAME = "grammar" const val FUNCS_NAME = "ntFuncs" val descriptorType = Descriptor::class.asTypeName().parameterizedBy(vertexType) - val sppfType = SppfNode::class.asTypeName().parameterizedBy(vertexType).copy(true) + val sppfType = SppfNode::class.asTypeName().parameterizedBy(vertexType).nullable() const val DESCRIPTOR = "descriptor" const val SPPF_NODE = "curSppfNode" const val RSM_FIELD = "rsmState" diff --git a/solver/src/main/kotlin/org/ucfs/grammar/combinator/extension/StringExtension.kt b/solver/src/main/kotlin/org/ucfs/grammar/combinator/extension/StringExtension.kt index 11dd26226..738cfc41a 100644 --- a/solver/src/main/kotlin/org/ucfs/grammar/combinator/extension/StringExtension.kt +++ b/solver/src/main/kotlin/org/ucfs/grammar/combinator/extension/StringExtension.kt @@ -23,4 +23,6 @@ object StringExtension { fun many(some: String): Regexp { return many(Term(some)) } + fun Option(exp: String) = Alternative.makeAlternative(Epsilon, Term(exp)) + } \ No newline at end of file diff --git a/solver/src/main/kotlin/org/ucfs/grammar/combinator/regexp/Alternative.kt b/solver/src/main/kotlin/org/ucfs/grammar/combinator/regexp/Alternative.kt index b984bc67b..0b772977f 100644 --- a/solver/src/main/kotlin/org/ucfs/grammar/combinator/regexp/Alternative.kt +++ b/solver/src/main/kotlin/org/ucfs/grammar/combinator/regexp/Alternative.kt @@ -29,10 +29,5 @@ data class Alternative( } infix fun Regexp.or(other: Regexp): Regexp = Alternative.makeAlternative(left = this, other) -infix fun String.or(other: Regexp): Regexp = Alternative.makeAlternative(left = Term(this), other) -infix fun Regexp.or(other: String): Regexp = Alternative.makeAlternative(left = this, Term(other)) -infix fun String.or(other: String): Regexp = Alternative.makeAlternative(left = Term(this), Term(other)) - fun Option(exp: Regexp) = Alternative.makeAlternative(Epsilon, exp) -fun Option(exp: String) = Alternative.makeAlternative(Epsilon, Term(exp)) \ No newline at end of file