forked from vadyushkins/kotgll
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add ast extraction and writing in dot
- Loading branch information
Showing
12 changed files
with
217 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,66 @@ | ||
package org.ucfs.ast | ||
|
||
import org.ucfs.sppf.node.ISppfNode | ||
import org.ucfs.sppf.node.IntermediateSppfNode | ||
import org.ucfs.sppf.node.PackedSppfNode | ||
import org.ucfs.sppf.node.SymbolSppfNode | ||
import org.ucfs.GeneratorException | ||
import org.ucfs.rsm.symbol.Nonterminal | ||
import org.ucfs.sppf.node.* | ||
|
||
object AstExtractor { | ||
fun extract(sppf: ISppfNode): Node { | ||
class AstExtractor<TerminalType>(val pkg: String) { | ||
val nonterminalToClass = HashMap<Nonterminal, Class<*>>() | ||
fun extract(sppf: ISppfNode?): Node { | ||
val root = Node(null, 0) | ||
extract(sppf, root, null) | ||
return root.children.firstOrNull() ?: root | ||
} | ||
|
||
private fun getOffset(left: Node?, parent: Node): Int { | ||
return if (left == null) { | ||
parent.offset | ||
} else { | ||
left.offset + left.length | ||
} | ||
} | ||
|
||
/** | ||
* return rightest node of subtree | ||
*/ | ||
private fun extract(sppf: ISppfNode?, parent: Node, left: Node?): Node? { | ||
when (sppf) { | ||
is PackedSppfNode<*> -> { | ||
val newLeft = extract(sppf.leftSppfNode, parent, left) | ||
return extract(sppf.rightSppfNode, parent, newLeft) | ||
} | ||
|
||
is IntermediateSppfNode<*> -> { | ||
return extract(sppf.children.firstOrNull(), parent, left) | ||
} | ||
|
||
is IntermediateSppfNode<*> -> TODO() | ||
is SymbolSppfNode<*> -> { | ||
val className = NodeClassesGenerator.getClassName(sppf.symbol) | ||
val nodeClass = getNodeClass(sppf.symbol) | ||
val ctor = nodeClass.getConstructor(Node::class.java, Int::class.java) | ||
|
||
val node: Node = ctor.newInstance(parent, getOffset(left, parent)) as Node | ||
node.left = left | ||
parent.children.add(node) | ||
val packedNode: PackedSppfNode<*> = sppf.children.first() | ||
extract(packedNode, node, null) | ||
parent.length += node.length | ||
return node | ||
} | ||
|
||
is TerminalSppfNode<*> -> { | ||
val node = TerminalNode(parent, getOffset(left, parent), sppf.terminal, left) | ||
parent.children.add(node) | ||
parent.length += sppf.terminal.toString().length | ||
return node | ||
} | ||
|
||
null -> return null | ||
else -> throw GeneratorException("Unknown sppf node type : $sppf") | ||
} | ||
TODO() | ||
} | ||
|
||
private fun getNodeClass(nt: Nonterminal): Class<*> { | ||
return nonterminalToClass.getOrPut(nt) | ||
{ Class.forName("$pkg.${NodeClassesGenerator.getClassName(nt)}") } | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
package org.ucfs.ast | ||
|
||
import java.nio.file.Files | ||
import java.nio.file.Path | ||
|
||
class DotWriter { | ||
private var lastId = 0 | ||
private val showSiblings = false | ||
val ids: HashMap<Node, Int> = HashMap() | ||
fun getId(node: Node): Int { | ||
return ids.getOrPut(node) { lastId++ } | ||
} | ||
|
||
fun getDotView(root: Node, label: String = "AST"): String { | ||
val view = StringBuilder("digraph g {") | ||
view.append("label=\"$label\"") | ||
view.append(handleNode(root)) | ||
view.append("}") | ||
return view.toString() | ||
} | ||
|
||
private fun getNodeLabel(node: Node): String { | ||
val view = StringBuilder("label = \"") | ||
when (node) { | ||
is TerminalNode<*> -> { | ||
view.append(node.terminal.toString()) | ||
} | ||
else -> { | ||
view.append(node.javaClass.simpleName) | ||
} | ||
} | ||
view.append("\noffset = ${node.offset}") | ||
view.append("\nlength = ${node.length}") | ||
view.append("\"") | ||
return view.toString() | ||
} | ||
|
||
private fun getNodeView(node: Node): StringBuilder { | ||
val view = StringBuilder("\n${getId(node)} [ ${getNodeLabel(node)}") | ||
if(node is TerminalNode<*>){ | ||
view.append(", color = green") | ||
} | ||
view.append("]") | ||
return view | ||
} | ||
|
||
fun handleNode(node: Node): String { | ||
val id = getId(node) | ||
val view = getNodeView(node) | ||
val left = node.left | ||
|
||
if (showSiblings && left != null) { | ||
view.append("\n$id -> ${getId(left)} [color=blue]") | ||
} | ||
|
||
for (child in node.children) { | ||
view.append("\n$id -> ${getId(child)}") | ||
view.append(handleNode(child)) | ||
} | ||
return view.toString() | ||
} | ||
|
||
fun writeToFile(view: String, filePath: Path) { | ||
val genPath = Path.of("gen") | ||
Files.createDirectories(genPath) | ||
val file = genPath.resolve(filePath).toFile() | ||
file.writeText(view) | ||
} | ||
|
||
fun writeToFile(root: Node, fileName: String, label: String = "AST") { | ||
writeToFile(getDotView(root, label), Path.of("$fileName.dot")) | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,10 @@ | ||
package org.ucfs.ast | ||
|
||
class TerminalNode<T>(parent: Node, offset: Int, length: Int) : | ||
Node(emptyList(), parent, offset, length) | ||
import org.ucfs.rsm.symbol.ITerminal | ||
|
||
class TerminalNode<T : ITerminal>(parent: Node, offset: Int, val terminal: T?, override var left: Node?) : | ||
Node(parent, offset) { | ||
init { | ||
length = terminal.toString().length | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
package org.ucfs.examples | ||
|
||
import org.ucfs.ast.AstExtractor | ||
import org.ucfs.ast.DotWriter | ||
import org.ucfs.ast.NodeClassesGenerator | ||
import org.ucfs.examples.dyck.DyckGrammar | ||
import org.ucfs.examples.golang.SimpleGolang | ||
import org.ucfs.grammar.combinator.Grammar | ||
import org.ucfs.input.LinearInput | ||
import org.ucfs.parser.Gll | ||
import org.ucfs.sppf.writeSppfToDot | ||
import java.nio.file.Path | ||
|
||
object Examples { | ||
fun generateAst(grammar: Grammar, pkg: String, input: String){ | ||
val grammarClass = grammar::class.java | ||
NodeClassesGenerator(grammarClass).generate(Path.of("generator", "src", "main", "kotlin"), pkg) | ||
val gll = Gll.gll(grammar.rsm, LinearInput.buildFromString(input)) | ||
val sppf = gll.parse().first | ||
val ast = AstExtractor<String>(pkg).extract(sppf) | ||
DotWriter().writeToFile(ast, "${grammarClass.simpleName}_ast", "${grammarClass.simpleName} AST") | ||
writeSppfToDot(sppf!!, Path.of("${grammarClass.simpleName}_sppf.dot").toString()) | ||
} | ||
} | ||
|
||
fun main(){ | ||
Examples.generateAst(SimpleGolang(), "org.ucfs.examples.golang", "r 1 + 1 ;") | ||
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", "[ ( ) ] ") | ||
} |
15 changes: 6 additions & 9 deletions
15
.../org/ucfs/examples/AstGeneratorExample.kt → ...lin/org/ucfs/examples/dyck/DyckGrammar.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,23 @@ | ||
package org.ucfs.examples | ||
package org.ucfs.examples.dyck | ||
|
||
import org.ucfs.ast.NodeClassesGenerator | ||
import org.ucfs.grammar.combinator.Grammar | ||
import org.ucfs.grammar.combinator.extension.StringExtension.times | ||
import org.ucfs.grammar.combinator.regexp.Epsilon | ||
import org.ucfs.grammar.combinator.regexp.times | ||
import org.ucfs.grammar.combinator.regexp.Nt | ||
import org.ucfs.grammar.combinator.regexp.or | ||
import java.nio.file.Path | ||
|
||
class DyckGrammar: Grammar(){ | ||
class DyckGrammar : Grammar() { | ||
var S by Nt() | ||
var Round by Nt() | ||
var Quadrat by Nt() | ||
var Curly by Nt() | ||
init{ | ||
|
||
init { | ||
setStart(S) | ||
S = Round or Quadrat or Epsilon | ||
S = (Round or Quadrat or Curly or Epsilon) | ||
Round = "(" * S * ")" | ||
Quadrat = "[" * S * "]" | ||
Curly = "{" * S * "}" | ||
} | ||
} | ||
fun main(){ | ||
NodeClassesGenerator(DyckGrammar::class.java).generate(Path.of("gen"), "") | ||
} |
32 changes: 32 additions & 0 deletions
32
generator/src/main/kotlin/org/ucfs/examples/golang/SimpleGolang.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package org.ucfs.examples.golang | ||
|
||
import org.ucfs.ast.AstExtractor | ||
import org.ucfs.ast.DotWriter | ||
import org.ucfs.ast.NodeClassesGenerator | ||
import org.ucfs.grammar.combinator.Grammar | ||
import org.ucfs.grammar.combinator.extension.StringExtension.or | ||
import org.ucfs.grammar.combinator.extension.StringExtension.times | ||
import org.ucfs.grammar.combinator.regexp.Many | ||
import org.ucfs.grammar.combinator.regexp.Nt | ||
import org.ucfs.grammar.combinator.regexp.or | ||
import org.ucfs.input.LinearInput | ||
import org.ucfs.input.LinearInputLabel | ||
import org.ucfs.parser.Gll | ||
import org.ucfs.sppf.writeSppfToDot | ||
import java.nio.file.Path | ||
import javax.sound.sampled.Line | ||
|
||
class SimpleGolang : Grammar() { | ||
var Program by Nt() | ||
var Block by Nt() | ||
var Statement by Nt() | ||
var IntExpr by Nt() | ||
|
||
init { | ||
setStart(Program) | ||
Program = Block | ||
Block = Many(Statement) | ||
Statement = IntExpr * ";" or "r" * IntExpr * ";" | ||
IntExpr = "1" or "1" * "+" * "1" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters