Skip to content

Add API for any dot input with strings on edges #35

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions benchmarks/src/main/kotlin/Benchmarks.kt
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ fun getResultPath(
}


fun getTokenStream(input: String): LinearInput<Int, LinearInputLabel> {
val graph = LinearInput<Int, LinearInputLabel>()
fun getTokenStream(input: String): LinearInput<Int, TerminalInputLabel> {
val graph = LinearInput<Int, TerminalInputLabel>()
getTokenStream(input, graph)
return graph
}



fun <G : IInputGraph<Int, LinearInputLabel>> getTokenStream(input: String, inputGraph: G): G {
fun <G : IInputGraph<Int, TerminalInputLabel>> getTokenStream(input: String, inputGraph: G): G {
val lexer = Scanner(StringReader(input))
var token: JavaToken
var vertexId = 1
Expand All @@ -34,21 +34,21 @@ fun <G : IInputGraph<Int, LinearInputLabel>> getTokenStream(input: String, input
while (true) {
token = lexer.yylex() as JavaToken
if (token == JavaToken.EOF) break
inputGraph.addEdge(vertexId, LinearInputLabel(token), ++vertexId)
inputGraph.addEdge(vertexId, TerminalInputLabel(token), ++vertexId)
}

return inputGraph
}

fun getCharStream(input: String): LinearInput<Int, LinearInputLabel> {
val inputGraph = LinearInput<Int, LinearInputLabel>()
fun getCharStream(input: String): LinearInput<Int, TerminalInputLabel> {
val inputGraph = LinearInput<Int, TerminalInputLabel>()
var vertexId = 1

inputGraph.addVertex(vertexId)
inputGraph.addStartVertex(vertexId)

for (ch in input) {
inputGraph.addEdge(vertexId, LinearInputLabel(Term(ch.toString())), ++vertexId)
inputGraph.addEdge(vertexId, TerminalInputLabel(Term(ch.toString())), ++vertexId)
inputGraph.addVertex(vertexId)
}

Expand Down
5 changes: 2 additions & 3 deletions benchmarks/src/main/kotlin/org/ucfs/Java8ParserRecovery.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

package org.ucfs

import org.ucfs.JavaToken
import org.ucfs.descriptors.Descriptor
import org.ucfs.input.IInputGraph
import org.ucfs.input.ILabel
Expand Down Expand Up @@ -8649,8 +8648,8 @@ public class Java8ParserRecovery<VertexType, LabelType : ILabel> :
}
}

override fun parse(descriptor: Descriptor<VertexType>) {
super.parse(descriptor)
override fun handleDescriptor(descriptor: Descriptor<VertexType>) {
super.handleDescriptor(descriptor)
org.ucfs.intersection.RecoveryIntersection.handleRecoveryEdges(this, descriptor)
}

Expand Down
5 changes: 2 additions & 3 deletions benchmarks/src/test/kotlin/OfflineUcfsBenchmark.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import org.junit.jupiter.api.Disabled
import org.ucfs.input.LinearInputLabel
import org.ucfs.input.TerminalInputLabel
import java.io.File

fun main(args: Array<String>) {
Expand All @@ -19,7 +18,7 @@ class OfflineUcfsBenchmark : ParsingBenchmarks() {
}

override fun parse(text: String) {
val parser = org.ucfs.Java8Parser<Int, LinearInputLabel>()
val parser = org.ucfs.Java8Parser<Int, TerminalInputLabel>()
parser.setInput(getTokenStream(text))
parser.parse()
assert(parser.parse().first != null)
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/src/test/kotlin/RecoveryOfflineUcfsBenchmark.kt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import org.ucfs.input.LinearInputLabel
import org.ucfs.input.TerminalInputLabel
import kotlin.test.Ignore

@Ignore
class RecoveryOfflineUcfsBenchmark : ParsingBenchmarks() {
override fun getShortName(): String = "RecUcfsOff"
override fun parse(text: String) {
val parser = org.ucfs.Java8ParserRecovery<Int, LinearInputLabel>()
val parser = org.ucfs.Java8ParserRecovery<Int, TerminalInputLabel>()
parser.setInput(getTokenStream(text))
assert(parser.parse().first!= null){"can't build sppf"}
}
Expand Down
8 changes: 4 additions & 4 deletions benchmarks/src/test/kotlin/SimpleUcfsCorrect.kt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import org.junit.jupiter.api.Test
import org.ucfs.input.IInputGraph
import org.ucfs.input.LinearInput
import org.ucfs.input.LinearInputLabel
import org.ucfs.input.TerminalInputLabel
import org.ucfs.parser.Gll
import org.ucfs.rsm.symbol.Term
import org.ucfs.rsm.writeRsmToDot
Expand All @@ -25,14 +25,14 @@ class SimpleUcfsCorrect {
}


fun getTokenStream(input: List<Term<String>>): IInputGraph<Int, LinearInputLabel> {
val inputGraph = LinearInput<Int, LinearInputLabel>()
fun getTokenStream(input: List<Term<String>>): IInputGraph<Int, TerminalInputLabel> {
val inputGraph = LinearInput<Int, TerminalInputLabel>()
var vertexId = 1

inputGraph.addVertex(vertexId)
inputGraph.addStartVertex(vertexId)
for (term in input) {
inputGraph.addEdge(vertexId, LinearInputLabel(term), ++vertexId)
inputGraph.addEdge(vertexId, TerminalInputLabel(term), ++vertexId)
}

return inputGraph
Expand Down
1 change: 1 addition & 0 deletions generator/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ repositories {
dependencies {
implementation(project(":solver"))
implementation("com.squareup:kotlinpoet:1.16.0")
testImplementation(kotlin("test"))
}

tasks.test {
Expand Down
76 changes: 0 additions & 76 deletions generator/src/main/kotlin/org/ucfs/ast/AstExtractor.kt
Original file line number Diff line number Diff line change
@@ -1,76 +0,0 @@
package org.ucfs.ast

import org.ucfs.GeneratorException
import org.ucfs.rsm.symbol.Nonterminal
import org.ucfs.sppf.node.*

class AstExtractor(val pkg: String) {
val nonterminalToClass = HashMap<Nonterminal, Class<*>>()

/**
* need to handle "many" in rules (many can make cycles in sppf)
*/
val used = HashSet<PackedSppfNode<*>>()
fun extract(sppf: ISppfNode?): Node {
val root = Node(null, 0)
extract(sppf, root, null)
return root.children.firstOrNull() ?: root
}

private fun getOffset(left: Node?, parent: Node): Int {
return if (left == null) {
parent.offset
} else {
left.offset + left.length
}
}

/**
* return rightest node of subtree
*/
private fun extract(sppf: ISppfNode?, parent: Node, left: Node?): Node? {
when (sppf) {
is PackedSppfNode<*> -> {
val newLeft = extract(sppf.leftSppfNode, parent, left)
return extract(sppf.rightSppfNode, parent, newLeft)
}

is IntermediateSppfNode<*> -> {
return extract(sppf.children.firstOrNull(), parent, left)
}

is SymbolSppfNode<*> -> {
val nodeClass = getNodeClass(sppf.symbol)
val ctor = nodeClass.getConstructor(Node::class.java, Int::class.java)

val node: Node = ctor.newInstance(parent, getOffset(left, parent)) as Node
node.isRecovered = sppf.weight > 0
node.left = left
parent.children.add(node)

val packedNode: PackedSppfNode<*> = sppf.children.first { pn -> !used.contains(pn) }
used.add(packedNode)

extract(packedNode, node, null)
parent.length += node.length
return node
}

is TerminalSppfNode<*> -> {
val node = TerminalNode(parent, getOffset(left, parent), sppf.terminal, left)
node.isRecovered = sppf.weight > 0
parent.children.add(node)
parent.length += sppf.terminal.toString().length
return node
}

null -> return null
else -> throw GeneratorException("Unknown sppf node type : $sppf")
}
}

private fun getNodeClass(nt: Nonterminal): Class<*> {
return nonterminalToClass.getOrPut(nt)
{ Class.forName("$pkg.${NodeClassesGenerator.getClassName(nt)}") }
}
}
51 changes: 0 additions & 51 deletions generator/src/main/kotlin/org/ucfs/examples/Examples.kt
Original file line number Diff line number Diff line change
@@ -1,51 +0,0 @@
package org.ucfs.examples

import org.ucfs.ast.AstExtractor
import org.ucfs.ast.DotWriter
import org.ucfs.ast.NodeClassesGenerator
import org.ucfs.examples.dyck.DyckGrammar
import org.ucfs.examples.golang.SimpleGolang
import org.ucfs.grammar.combinator.Grammar
import org.ucfs.input.LinearInput
import org.ucfs.parser.Gll
import org.ucfs.rsm.writeRsmToDot
import org.ucfs.sppf.writeSppfToDot
import java.nio.file.Path


object Examples {
fun generateAst(grammar: Grammar, pkg: String, input: String, name: String) {
val grammarClass = grammar::class.java
NodeClassesGenerator(grammarClass).generate(Path.of("generator", "src", "main", "kotlin"), pkg)
val gll = Gll.gll(grammar.rsm, LinearInput.buildFromString(input))
val sppf = gll.parse().first
writeSppfToDot(sppf!!, Path.of("${name}.dot").toString(), "${grammarClass.simpleName} SPPF for $input")
val ast = AstExtractor(pkg).extract(sppf)
val label = "${grammarClass.simpleName} AST for $input"
DotWriter().writeToFile(
ast,
name,
label,
false
)
DotWriter().writeToFile(
ast,
"$name with siblings",
label,
true
)

}
}


fun main() {
writeRsmToDot(DyckGrammar().rsm, "rsm.dot")
Examples.generateAst(SimpleGolang(), "org.ucfs.examples.golang", "r 1 + 1 ;", "simple golang")
Examples.generateAst(SimpleGolang(), "org.ucfs.examples.golang", "r 1 + 1 ; 1 ; r 1 ;", "simple golang")
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", "[ ( ) ] ", "1_dyck")
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", "[ ( ) ] { }", "2_dyck")
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", "[ ] { } [ ( ) ]", "3_dyck")
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", " [ { } ( ) ] ", "3_dyck")
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", "[ ] { { } ( ) } [ ( ) ]", "3_dyck")
}
Loading
Loading