forked from vadyushkins/kotgll
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add ast extraction and ast node generation
- Loading branch information
Showing
22 changed files
with
469 additions
and
74 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -85,3 +85,6 @@ bin/ | |
|
||
### Mac OS ### | ||
.DS_Store | ||
|
||
### Generated files ### | ||
/gen/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package org.ucfs | ||
|
||
class GeneratorException(msg: String = "") : Exception("Generator exception$msg") { | ||
companion object { | ||
const val GRAMMAR_EXPECTED = "Only subclass of Grammar class can be used for parser generation" | ||
} | ||
} |
47 changes: 47 additions & 0 deletions
47
generator/src/main/kotlin/org/ucfs/IGeneratorFromGrammar.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
package org.ucfs | ||
|
||
import com.squareup.kotlinpoet.AnnotationSpec | ||
import com.squareup.kotlinpoet.ClassName | ||
import com.squareup.kotlinpoet.FileSpec | ||
import com.squareup.kotlinpoet.TypeName | ||
import org.ucfs.grammar.combinator.Grammar | ||
import java.nio.file.Path | ||
|
||
/** | ||
* Common logic for generators that use a Grammar class | ||
*/ | ||
interface IGeneratorFromGrammar { | ||
val grammarClazz: Class<*> | ||
|
||
/** | ||
* Build a grammar object from Class<*> | ||
*/ | ||
fun buildGrammar(grammarClazz: Class<*>): Grammar { | ||
if (!Grammar::class.java.isAssignableFrom(grammarClazz)) { | ||
throw GeneratorException(GeneratorException.GRAMMAR_EXPECTED) | ||
} | ||
val grammar = grammarClazz.getConstructor().newInstance() | ||
if (grammar is Grammar) { | ||
grammar.rsm | ||
return grammar | ||
} | ||
throw GeneratorException(GeneratorException.GRAMMAR_EXPECTED) | ||
} | ||
|
||
fun generate(location: Path, pkg: String) | ||
} | ||
|
||
internal fun FileSpec.Builder.suppressWarningTypes(vararg types: String) { | ||
if (types.isEmpty()) { | ||
return | ||
} | ||
|
||
val format = "%S,".repeat(types.count()).trimEnd(',') | ||
addAnnotation( | ||
AnnotationSpec.builder(ClassName("", "Suppress")) | ||
.addMember(format, *types) | ||
.build() | ||
) | ||
} | ||
|
||
fun TypeName.nullable(): TypeName = this.copy(nullable = true) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
package org.ucfs.ast | ||
|
||
import org.ucfs.GeneratorException | ||
import org.ucfs.rsm.symbol.Nonterminal | ||
import org.ucfs.sppf.node.* | ||
|
||
class AstExtractor(val pkg: String) { | ||
val nonterminalToClass = HashMap<Nonterminal, Class<*>>() | ||
|
||
/** | ||
* need to handle "many" in rules (many can make cycles in sppf) | ||
*/ | ||
val used = HashSet<PackedSppfNode<*>>() | ||
fun extract(sppf: ISppfNode?): Node { | ||
val root = Node(null, 0) | ||
extract(sppf, root, null) | ||
return root.children.firstOrNull() ?: root | ||
} | ||
|
||
private fun getOffset(left: Node?, parent: Node): Int { | ||
return if (left == null) { | ||
parent.offset | ||
} else { | ||
left.offset + left.length | ||
} | ||
} | ||
|
||
/** | ||
* return rightest node of subtree | ||
*/ | ||
private fun extract(sppf: ISppfNode?, parent: Node, left: Node?): Node? { | ||
when (sppf) { | ||
is PackedSppfNode<*> -> { | ||
val newLeft = extract(sppf.leftSppfNode, parent, left) | ||
return extract(sppf.rightSppfNode, parent, newLeft) | ||
} | ||
|
||
is IntermediateSppfNode<*> -> { | ||
return extract(sppf.children.firstOrNull(), parent, left) | ||
} | ||
|
||
is SymbolSppfNode<*> -> { | ||
val nodeClass = getNodeClass(sppf.symbol) | ||
val ctor = nodeClass.getConstructor(Node::class.java, Int::class.java) | ||
|
||
val node: Node = ctor.newInstance(parent, getOffset(left, parent)) as Node | ||
node.left = left | ||
parent.children.add(node) | ||
|
||
val packedNode: PackedSppfNode<*> = sppf.children.first { pn -> !used.contains(pn) } | ||
used.add(packedNode) | ||
|
||
extract(packedNode, node, null) | ||
parent.length += node.length | ||
return node | ||
} | ||
|
||
is TerminalSppfNode<*> -> { | ||
val node = TerminalNode(parent, getOffset(left, parent), sppf.terminal, left) | ||
parent.children.add(node) | ||
parent.length += sppf.terminal.toString().length | ||
return node | ||
} | ||
|
||
null -> return null | ||
else -> throw GeneratorException("Unknown sppf node type : $sppf") | ||
} | ||
} | ||
|
||
private fun getNodeClass(nt: Nonterminal): Class<*> { | ||
return nonterminalToClass.getOrPut(nt) | ||
{ Class.forName("$pkg.${NodeClassesGenerator.getClassName(nt)}") } | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
package org.ucfs.ast | ||
|
||
import java.nio.file.Files | ||
import java.nio.file.Path | ||
|
||
class DotWriter { | ||
private var lastId = 0 | ||
var showSiblings = true | ||
val ids: HashMap<Node, Int> = HashMap() | ||
fun getId(node: Node): Int { | ||
return ids.getOrPut(node) { lastId++ } | ||
} | ||
|
||
fun getDotView(root: Node, label: String = "AST"): String { | ||
val view = StringBuilder("digraph g {") | ||
view.append("label=\"$label\"") | ||
view.append(handleNode(root)) | ||
view.append("}") | ||
return view.toString() | ||
} | ||
|
||
private fun getNodeLabel(node: Node): String { | ||
val view = StringBuilder("label = \"") | ||
when (node) { | ||
is TerminalNode<*> -> { | ||
view.append(node.terminal.toString()) | ||
} | ||
|
||
else -> { | ||
view.append(node.javaClass.simpleName) | ||
} | ||
} | ||
view.append("\noffset = ${node.offset}") | ||
view.append("\nlength = ${node.length}") | ||
view.append("\"") | ||
return view.toString() | ||
} | ||
|
||
private fun getNodeView(node: Node): StringBuilder { | ||
val view = StringBuilder("\n${getId(node)} [ ${getNodeLabel(node)}") | ||
if (node is TerminalNode<*>) { | ||
view.append(", color = green") | ||
} | ||
view.append("]") | ||
return view | ||
} | ||
|
||
fun handleNode(node: Node): String { | ||
val id = getId(node) | ||
val view = getNodeView(node) | ||
val left = node.left | ||
|
||
if (showSiblings && left != null) { | ||
view.append("\n$id -> ${getId(left)} [color=blue]") | ||
} | ||
|
||
for (child in node.children) { | ||
view.append("\n$id -> ${getId(child)}") | ||
view.append(handleNode(child)) | ||
} | ||
return view.toString() | ||
} | ||
|
||
fun writeToFile(view: String, filePath: Path) { | ||
val genPath = Path.of("gen", "ast") | ||
Files.createDirectories(genPath) | ||
val file = genPath.resolve(filePath).toFile() | ||
file.writeText(view) | ||
} | ||
|
||
fun writeToFile(root: Node, fileName: String, label: String = "AST", showSiblings: Boolean) { | ||
this.showSiblings = showSiblings | ||
writeToFile(getDotView(root, label), Path.of("$fileName.dot")) | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
package org.ucfs.ast | ||
|
||
/** | ||
* TODO add methods below | ||
* - sppfNode (internalNode) | ||
* - constructor (parent, sppfNode, offset) | ||
*/ | ||
open class Node( | ||
var parent: Node?, | ||
var offset: Int, | ||
) { | ||
var length: Int = 0 | ||
open var left: Node? = null | ||
var right: Node? = null | ||
var children: ArrayList<Node> = ArrayList() | ||
} |
94 changes: 94 additions & 0 deletions
94
generator/src/main/kotlin/org/ucfs/ast/NodeClassesGenerator.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
package org.ucfs.ast | ||
|
||
import com.squareup.kotlinpoet.* | ||
import com.squareup.kotlinpoet.ParameterizedTypeName.Companion.parameterizedBy | ||
import org.ucfs.IGeneratorFromGrammar | ||
import org.ucfs.grammar.combinator.Grammar | ||
import org.ucfs.grammar.combinator.regexp.* | ||
import org.ucfs.rsm.symbol.Nonterminal | ||
import org.ucfs.suppressWarningTypes | ||
import java.nio.file.Path | ||
|
||
/** | ||
* Generate Ast node class for each nonterminal in grammar. | ||
*/ | ||
class NodeClassesGenerator(override val grammarClazz: Class<*>) : | ||
IGeneratorFromGrammar { | ||
val grammar: Grammar = buildGrammar(grammarClazz) | ||
|
||
private val superClass: Class<*> = Node::class.java | ||
|
||
companion object { | ||
fun getClassName(nt: Nt): String = getClassName(nt.nonterm) | ||
fun getClassName(nt: Nonterminal): String = "${nt.name}Node" | ||
|
||
//TODO add extensions `TerminalType: ITerminal` | ||
val terminalType = TypeVariableName("TerminalType") | ||
const val FUN_GET_CHILDREN = "getChildren" | ||
const val OFFSET = "offset" | ||
const val PARENT = "parent" | ||
const val LENGTH = "length" | ||
|
||
} | ||
|
||
/** | ||
* Generate class for each nonterminal in grammar | ||
*/ | ||
override fun generate(location: Path, pkg: String) { | ||
for (nt in grammar.nonTerms) { | ||
val file = generateClassFile(nt, pkg) | ||
file.writeTo(location) | ||
} | ||
} | ||
|
||
/** | ||
* Generate class for concrete nonterminal | ||
*/ | ||
private fun generateClassFile(nt: Nt, pkg: String): FileSpec { | ||
val fileName = getClassName(nt) | ||
val ntClass = ClassName(pkg, fileName).parameterizedBy(terminalType) | ||
val nodeClassBuilder = TypeSpec.classBuilder(ntClass.rawType.simpleName) | ||
.addTypeVariable(terminalType) | ||
.superclass(superClass.asTypeName()) | ||
.addFunction(generateConstructor()) | ||
|
||
val fileBuilder = FileSpec | ||
.builder(pkg, ntClass.rawType.simpleName) | ||
.addType(nodeClassBuilder.build()) | ||
|
||
fileBuilder.suppressWarningTypes("RedundantVisibilityModifier") | ||
return fileBuilder.build() | ||
} | ||
|
||
/** | ||
* Generate constructor | ||
*/ | ||
private fun generateConstructor(): FunSpec { | ||
return FunSpec.constructorBuilder() | ||
.addParameter(PARENT, superClass) | ||
.addParameter(OFFSET, Int::class) | ||
.callSuperConstructor(PARENT, OFFSET) | ||
.build() | ||
} | ||
|
||
private fun extractChildren(re: Regexp, isOptional: Boolean): List<PropertySpec> { | ||
return when (re) { | ||
is Alternative -> extractChildren(re.left, true) + | ||
extractChildren(re.right, true) | ||
|
||
is Concat -> extractChildren(re.head, isOptional) + | ||
extractChildren(re.tail, isOptional) | ||
|
||
is Empty -> listOf() | ||
is Epsilon -> listOf() | ||
is Many -> extractChildren(re.exp, true) | ||
is DerivedSymbol -> listOf(generateProperty(re, isOptional)) | ||
} | ||
} | ||
|
||
|
||
private fun <T> generateProperty(value: T, isOptional: Boolean): PropertySpec { | ||
TODO() | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
package org.ucfs.ast | ||
|
||
import org.ucfs.rsm.symbol.ITerminal | ||
|
||
class TerminalNode<T : ITerminal>(parent: Node, offset: Int, val terminal: T?, override var left: Node?) : | ||
Node(parent, offset) { | ||
init { | ||
length = terminal.toString().length | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
package org.ucfs.examples | ||
|
||
import org.ucfs.ast.AstExtractor | ||
import org.ucfs.ast.DotWriter | ||
import org.ucfs.ast.NodeClassesGenerator | ||
import org.ucfs.examples.dyck.DyckGrammar | ||
import org.ucfs.examples.golang.SimpleGolang | ||
import org.ucfs.grammar.combinator.Grammar | ||
import org.ucfs.input.LinearInput | ||
import org.ucfs.parser.Gll | ||
import org.ucfs.rsm.writeRsmToDot | ||
import org.ucfs.sppf.writeSppfToDot | ||
import java.nio.file.Path | ||
|
||
|
||
object Examples { | ||
fun generateAst(grammar: Grammar, pkg: String, input: String, name: String) { | ||
val grammarClass = grammar::class.java | ||
NodeClassesGenerator(grammarClass).generate(Path.of("generator", "src", "main", "kotlin"), pkg) | ||
val gll = Gll.gll(grammar.rsm, LinearInput.buildFromString(input)) | ||
val sppf = gll.parse().first | ||
writeSppfToDot(sppf!!, Path.of("${name}.dot").toString(), "${grammarClass.simpleName} SPPF for $input") | ||
val ast = AstExtractor(pkg).extract(sppf) | ||
val label = "${grammarClass.simpleName} AST for $input" | ||
DotWriter().writeToFile( | ||
ast, | ||
name, | ||
label, | ||
false | ||
) | ||
DotWriter().writeToFile( | ||
ast, | ||
"$name with siblings", | ||
label, | ||
true | ||
) | ||
|
||
} | ||
} | ||
|
||
|
||
fun main() { | ||
writeRsmToDot(DyckGrammar().rsm, "rsm.dot") | ||
Examples.generateAst(SimpleGolang(), "org.ucfs.examples.golang", "r 1 + 1 ;", "simple golang") | ||
Examples.generateAst(SimpleGolang(), "org.ucfs.examples.golang", "r 1 + 1 ; 1 ; r 1 ;", "simple golang") | ||
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", "[ ( ) ] ", "1_dyck") | ||
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", "[ ( ) ] { }", "2_dyck") | ||
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", "[ ] { } [ ( ) ]", "3_dyck") | ||
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", " [ { } ( ) ] ", "3_dyck") | ||
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", "[ ] { { } ( ) } [ ( ) ]", "3_dyck") | ||
} |
Oops, something went wrong.