Skip to content

Add fictive start nonterminal #37

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions solver/src/main/kotlin/org/ucfs/grammar/combinator/Grammar.kt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ open class Grammar {
val nonTerms = ArrayList<Nt>()

private lateinit var startNt: Nt
private lateinit var fictitiousStartNt: Nt

private var _rsm: RsmState? = null
val rsm: RsmState
Expand All @@ -22,12 +23,6 @@ open class Grammar {
return _rsm!!
}

fun setStart(expr: Regexp) {
if (expr is Nt) {
startNt = expr
} else throw IllegalArgumentException("Only NT object can be start state for Grammar")
}

fun Nt.asStart(): Nt {
if (this@Grammar::startNt.isInitialized) {
throw Exception("Nonterminal ${nonterm.name} is already initialized")
Expand All @@ -43,6 +38,8 @@ open class Grammar {
private fun buildRsm(): RsmState {
nonTerms.forEach { it.buildRsmBox() }
//if nonterminal not initialized -- it will be checked in buildRsmBox()
return startNt.nonterm.startState
fictitiousStartNt = Nt(startNt, "fictiveStart")
fictitiousStartNt.buildRsmBox()
return fictitiousStartNt.nonterm.startState
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ open class Nt() : DerivedSymbol {
constructor(lhs: Regexp) : this() {
rsmDescription = lhs
}
constructor(lhs: Regexp, name: String) : this() {
rsmDescription = lhs
this.nonterm = Nonterminal(name)
}


lateinit var nonterm: Nonterminal
private set
Expand Down
19 changes: 14 additions & 5 deletions solver/src/main/kotlin/org/ucfs/parser/Gll.kt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ class Gll<VertexType, LabelType : ILabel> private constructor(
fun <VertexType, LabelType : ILabel> gll(
startState: RsmState, inputGraph: IInputGraph<VertexType, LabelType>
): Gll<VertexType, LabelType> {
return Gll(Context(startState, inputGraph), IntersectionEngine)
val finalState = startState.outgoingEdges.get(0).destinationState
return Gll(Context(startState, finalState, inputGraph), IntersectionEngine)
}
}

Expand Down Expand Up @@ -66,6 +67,11 @@ class Gll<VertexType, LabelType : ILabel> private constructor(
ctx.descriptors.add(newDescriptor)
}

fun isParseResult(descriptor: Descriptor<VertexType>, matchedRange: RangeSppfNode<VertexType>): Boolean {
return matchedRange.inputRange!!.from in ctx.input.getInputStartVertices()
&& matchedRange.rsmRange!!.from == ctx.fictiveStartState
&& matchedRange.rsmRange.to == ctx.fictiveFinalState
}
/**
* Processes descriptor
* @param descriptor - descriptor to process
Expand All @@ -77,7 +83,7 @@ class Gll<VertexType, LabelType : ILabel> private constructor(
val node = getEpsilonRange(descriptor)
//TODO fix
// dirty hack: in fact it's equivavelnt descriptors
// but only initial was added in handlet set
// but only initial was added in handled set
ctx.descriptors.addToHandled(Descriptor(descriptor.inputPosition,
descriptor.gssNode, descriptor.rsmState, node))
node
Expand All @@ -87,11 +93,14 @@ class Gll<VertexType, LabelType : ILabel> private constructor(
for (poppedEdge in ctx.gss.pop(descriptor, matchedRange)) {
handlePoppedGssEdge(poppedEdge, descriptor, matchedRange)
}
if (descriptor.gssNode.outgoingEdges.isEmpty() && descriptor.gssNode.rsm.isStart) {
ctx.parseResult = matchedRange
if (isParseResult(descriptor, matchedRange)) {

if(ctx.parseResult == null) {
ctx.parseResult = matchedRange
}
ctx.parseResults.add(matchedRange)
}
}

engine.handleEdges(this, descriptor)
}
}
Expand Down
8 changes: 5 additions & 3 deletions solver/src/main/kotlin/org/ucfs/parser/IGll.kt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ interface IGll<InputNodeType, LabelType : ILabel> {
curDescriptor = ctx.descriptors.nextToHandle()
}

return ctx.parseResult
// assert(ctx.parseResult != null)
// assert(ctx.parseResult!!.children.size == 1)
return ctx.parseResult!!.children.get(0)
}

/**
Expand All @@ -52,9 +54,9 @@ interface IGll<InputNodeType, LabelType : ILabel> {
fun initDescriptors(input: IInputGraph<InputNodeType, LabelType>) {
for (startVertex in input.getInputStartVertices()) {

val gssNode = ctx.gss.getOrCreateNode(startVertex, ctx.startState)
val gssNode = ctx.gss.getOrCreateNode(startVertex, ctx.fictiveStartState)
val startDescriptor = Descriptor(
startVertex, gssNode, ctx.startState, getEmptyRange()
startVertex, gssNode, ctx.fictiveStartState, getEmptyRange(true)
)
ctx.descriptors.add(startDescriptor)
}
Expand Down
5 changes: 3 additions & 2 deletions solver/src/main/kotlin/org/ucfs/parser/context/Context.kt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.ucfs.parser.context

import org.ucfs.descriptors.Descriptor
import org.ucfs.descriptors.DescriptorsStorage
import org.ucfs.gss.GraphStructuredStack
import org.ucfs.input.IInputGraph
Expand All @@ -18,7 +17,8 @@ class Context<InputNodeType, LabelType : ILabel> (
/**
* Starting state of accepting Nonterminal in RSM
*/
val startState: RsmState,
val fictiveStartState: RsmState,
val fictiveFinalState: RsmState,
val input: IInputGraph<InputNodeType, LabelType>


Expand All @@ -37,4 +37,5 @@ class Context<InputNodeType, LabelType : ILabel> (
val gss: GraphStructuredStack<InputNodeType> = GraphStructuredStack()

var parseResult: RangeSppfNode<InputNodeType>? = null
var parseResults = ArrayList<RangeSppfNode<InputNodeType>>()
}
2 changes: 1 addition & 1 deletion solver/src/main/kotlin/org/ucfs/rsm/RsmWrite.kt
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ fun writeRsmToTxt(startState: RsmState, pathToTXT: String) {
}

fun writeRsmToDot(startState: RsmState, filePath: String) {
val states = getAllStates(startState)
val states = getAllStates((startState.outgoingEdges.get(0).symbol as Nonterminal).startState)
val boxes: HashMap<Nonterminal, HashSet<RsmState>> = HashMap()

for (state in states) {
Expand Down
22 changes: 20 additions & 2 deletions solver/src/main/kotlin/org/ucfs/sppf/node/RangeSppfNode.kt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,13 @@ data class RangeSppfNode<VertexType>(
val children = ArrayList<RangeSppfNode<VertexType>>()
}

fun <VertexType> getEmptyRange(): RangeSppfNode<VertexType> = RangeSppfNode(null, null, EmptyType())
fun <VertexType> getEmptyRange(isStart: Boolean = false): RangeSppfNode<VertexType> {
val type = EmptyType()
if(isStart) {
type.isStart = isStart
}
return RangeSppfNode(null, null, type)
}

data class InputRange<VertexType>(
val from: VertexType,
Expand All @@ -45,4 +51,16 @@ data class TerminalType<T : ITerminal>(val terminal: T) : RangeType
data class NonterminalType(val startState: RsmState) : RangeType
data class EpsilonNonterminalType(val startState: RsmState) : RangeType
data class IntermediateType<VertexType>(val grammarSlot: RsmState, val inputPosition: VertexType) : RangeType
data class EmptyType(private val id: Int = lastEmptyId++) : RangeType
class EmptyType : RangeType {
var isStart = false

@Override
override fun equals(other: Any?): Boolean {
return other is EmptyType
}

@Override
override fun hashCode(): Int {
return 12
}
}
2 changes: 1 addition & 1 deletion solver/src/main/kotlin/org/ucfs/sppf/writeSppfToDot.kt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ fun <InputNode> getSppfDot(sppfNode: RangeSppfNode<InputNode>, label: String = "
node = queue.removeFirst()
if (!visited.add(node.hashCode())) continue

nodeViews[node] = getNodeView(node, )
nodeViews[node] = getNodeView(node,)// node.id.toString())

node.children.forEach {
queue.addLast(it)
Expand Down
35 changes: 33 additions & 2 deletions test-shared/src/test/kotlin/grammars/SimpleDyck.kt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package grammars

import org.ucfs.grammar.combinator.Grammar
import org.ucfs.grammar.combinator.extension.StringExtension.or
import org.ucfs.grammar.combinator.extension.StringExtension.times
import org.ucfs.grammar.combinator.regexp.*
import org.ucfs.grammar.combinator.regexp.Epsilon
Expand All @@ -11,6 +12,7 @@ class SimplifiedDyck : Grammar() {

init {
S /= Option("(" * S * ")")
// S = eps | ( S )
}
}

Expand All @@ -19,13 +21,14 @@ class LoopDyck : Grammar() {

init {
S /= Many( "(" * S * ")")
// S = [ ( S ) ]*
}
}

class ABGrammar : Grammar() {
val A by Nt(Term("a"))
val A by Nt(Term("a")) // A -> a
val C by Nt(Term("a"))
val B by Nt(C)
val B by Nt(C) // C -> B
val S by Nt(A or B).asStart()
}

Expand All @@ -36,4 +39,32 @@ class SALang : Grammar() {

class Epsilon : Grammar() {
val S by Nt(Epsilon).asStart()
}

/**
* Can parse only one symbol 'a'
*/
class AmbiguousAStar1: Grammar() {
val S by Nt().asStart()

init {
S /= "a" or S
}
}

class AmbiguousAStar2: Grammar() {
val S by Nt().asStart()

init {
S /= "a" or S * S
}
}


class AmbiguousAStar3: Grammar() {
val S by Nt().asStart()

init {
S /= "a" or S * S
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ abstract class AbstractCorrectnessTest {

abstract fun getRootDataFolder(): Path

val grammars = listOf(SimplifiedDyck(), ABGrammar(), SALang(), Epsilon(), LoopDyck())
val grammars = listOf(SimplifiedDyck(), ABGrammar(), SALang(), Epsilon(), LoopDyck(), AmbiguousAStar2(), AmbiguousAStar1())
//TODO return only one result for ambiguous AmbiguousAStar2(), AmbiguousAStar1()
// TODO fix worst case for loopdyck
val regenerate = false
//@TestFactory
//TODO make it abstract by used grammar
Expand Down
2 changes: 1 addition & 1 deletion test-shared/src/test/kotlin/solver/TreeCorrectnessTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class TreeCorrectnessTest : AbstractCorrectnessTest() {
if (expectedResult.isEmpty() || regenerate) {
expectedFile.writeText(actualResult)
} else {
assertEquals(expectedResult, actualResult)
assertEquals(expectedResult, actualResult, "for grammar ${grammar.javaClass.simpleName} at ${testCasesFolder.name}")
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,19 @@ label=""
0 [label = "Nonterminal A, input: [0, 1]", shape = invtrapezium]
1 [label = "Nonterminal B, input: [0, 1]", shape = invtrapezium]
2 [label = "Nonterminal C, input: [0, 1]", shape = invtrapezium]
3 [label = "Range , input: [0, 1], rsm: [A_0, A_1]", shape = ellipse]
4 [label = "Range , input: [0, 1], rsm: [B_0, B_1]", shape = ellipse]
5 [label = "Range , input: [0, 1], rsm: [C_0, C_1]", shape = ellipse]
6 [label = "Range , input: [0, 1], rsm: [S_0, S_1]", shape = ellipse]
7 [label = "Terminal 'a', input: [0, 1]", shape = rectangle]
0->3
1->4
2->5
3 [label = "Nonterminal S, input: [0, 1]", shape = invtrapezium]
4 [label = "Range , input: [0, 1], rsm: [A_0, A_1]", shape = ellipse]
5 [label = "Range , input: [0, 1], rsm: [B_0, B_1]", shape = ellipse]
6 [label = "Range , input: [0, 1], rsm: [C_0, C_1]", shape = ellipse]
7 [label = "Range , input: [0, 1], rsm: [S_0, S_1]", shape = ellipse]
8 [label = "Terminal 'a', input: [0, 1]", shape = rectangle]
0->4
1->5
2->6
3->7
4->2
5->7
6->1
6->0
4->8
5->2
6->8
7->1
7->0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
digraph Input {
start -> 0;
0 -> 1 [label = "a"];
1 -> 2 [label = "a"];
2 -> 3 [label = "a"];
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
digraph g {
labelloc="t"
label=""
0 [label = "Nonterminal S, input: [0, 1]", shape = invtrapezium]
1 [label = "Nonterminal S, input: [0, 1]", shape = invtrapezium]
2 [label = "Range , input: [0, 1], rsm: [S_0, S_1]", shape = ellipse]
3 [label = "Terminal 'a', input: [0, 1]", shape = rectangle]
0->2
2->3
2->0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
digraph Input {
start -> 0;
0 -> 1 [label = "a"];
1 -> 2 [label = "a"];
2 -> 3 [label = "a"];
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
digraph g {
labelloc="t"
label=""
0 [label = "Intermediate input: 1, rsm: S_2, input: [0, 2]", shape = plain]
1 [label = "Intermediate input: 1, rsm: S_2, input: [0, 3]", shape = plain]
2 [label = "Intermediate input: 2, rsm: S_2, input: [0, 3]", shape = plain]
3 [label = "Intermediate input: 2, rsm: S_2, input: [1, 3]", shape = plain]
4 [label = "Nonterminal S, input: [0, 1]", shape = invtrapezium]
5 [label = "Nonterminal S, input: [0, 2]", shape = invtrapezium]
6 [label = "Nonterminal S, input: [0, 3]", shape = invtrapezium]
7 [label = "Nonterminal S, input: [1, 2]", shape = invtrapezium]
8 [label = "Nonterminal S, input: [1, 2]", shape = invtrapezium]
9 [label = "Nonterminal S, input: [1, 3]", shape = invtrapezium]
10 [label = "Nonterminal S, input: [2, 3]", shape = invtrapezium]
11 [label = "Range , input: [0, 1], rsm: [S_0, S_1]", shape = ellipse]
12 [label = "Range , input: [0, 1], rsm: [S_0, S_2]", shape = ellipse]
13 [label = "Range , input: [0, 2], rsm: [S_0, S_1]", shape = ellipse]
14 [label = "Range , input: [0, 2], rsm: [S_0, S_2]", shape = ellipse]
15 [label = "Range , input: [0, 3], rsm: [S_0, S_1]", shape = ellipse]
16 [label = "Range , input: [1, 2], rsm: [S_0, S_1]", shape = ellipse]
17 [label = "Range , input: [1, 2], rsm: [S_0, S_2]", shape = ellipse]
18 [label = "Range , input: [1, 2], rsm: [S_2, S_1]", shape = ellipse]
19 [label = "Range , input: [1, 3], rsm: [S_0, S_1]", shape = ellipse]
20 [label = "Range , input: [1, 3], rsm: [S_2, S_1]", shape = ellipse]
21 [label = "Range , input: [2, 3], rsm: [S_0, S_1]", shape = ellipse]
22 [label = "Range , input: [2, 3], rsm: [S_2, S_1]", shape = ellipse]
23 [label = "Terminal 'a', input: [0, 1]", shape = rectangle]
24 [label = "Terminal 'a', input: [1, 2]", shape = rectangle]
25 [label = "Terminal 'a', input: [2, 3]", shape = rectangle]
0->12
0->18
1->12
1->20
2->14
2->22
3->17
3->22
4->11
5->13
6->15
7->16
9->19
10->21
11->23
12->4
13->0
14->5
15->1
15->2
16->24
17->7
18->7
19->3
20->9
21->25
22->10
}
Loading
Loading