Skip to content

Commit 5bf1a5a

Browse files
authored
Add fictive start nonterminal (#37)
* Add API for any dot input with strings on edges
1 parent 3c6725b commit 5bf1a5a

File tree

26 files changed

+467
-258
lines changed

26 files changed

+467
-258
lines changed

solver/src/main/kotlin/org/ucfs/grammar/combinator/Grammar.kt

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ open class Grammar {
1212
val nonTerms = ArrayList<Nt>()
1313

1414
private lateinit var startNt: Nt
15+
private lateinit var fictitiousStartNt: Nt
1516

1617
private var _rsm: RsmState? = null
1718
val rsm: RsmState
@@ -22,12 +23,6 @@ open class Grammar {
2223
return _rsm!!
2324
}
2425

25-
fun setStart(expr: Regexp) {
26-
if (expr is Nt) {
27-
startNt = expr
28-
} else throw IllegalArgumentException("Only NT object can be start state for Grammar")
29-
}
30-
3126
fun Nt.asStart(): Nt {
3227
if (this@Grammar::startNt.isInitialized) {
3328
throw Exception("Nonterminal ${nonterm.name} is already initialized")
@@ -43,6 +38,8 @@ open class Grammar {
4338
private fun buildRsm(): RsmState {
4439
nonTerms.forEach { it.buildRsmBox() }
4540
//if nonterminal not initialized -- it will be checked in buildRsmBox()
46-
return startNt.nonterm.startState
41+
fictitiousStartNt = Nt(startNt, "fictiveStart")
42+
fictitiousStartNt.buildRsmBox()
43+
return fictitiousStartNt.nonterm.startState
4744
}
4845
}

solver/src/main/kotlin/org/ucfs/grammar/combinator/regexp/Nt.kt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ open class Nt() : DerivedSymbol {
1010
constructor(lhs: Regexp) : this() {
1111
rsmDescription = lhs
1212
}
13+
constructor(lhs: Regexp, name: String) : this() {
14+
rsmDescription = lhs
15+
this.nonterm = Nonterminal(name)
16+
}
17+
1318

1419
lateinit var nonterm: Nonterminal
1520
private set

solver/src/main/kotlin/org/ucfs/parser/Gll.kt

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ class Gll<VertexType, LabelType : ILabel> private constructor(
2929
fun <VertexType, LabelType : ILabel> gll(
3030
startState: RsmState, inputGraph: IInputGraph<VertexType, LabelType>
3131
): Gll<VertexType, LabelType> {
32-
return Gll(Context(startState, inputGraph), IntersectionEngine)
32+
val finalState = startState.outgoingEdges.get(0).destinationState
33+
return Gll(Context(startState, finalState, inputGraph), IntersectionEngine)
3334
}
3435
}
3536

@@ -66,6 +67,11 @@ class Gll<VertexType, LabelType : ILabel> private constructor(
6667
ctx.descriptors.add(newDescriptor)
6768
}
6869

70+
fun isParseResult(descriptor: Descriptor<VertexType>, matchedRange: RangeSppfNode<VertexType>): Boolean {
71+
return matchedRange.inputRange!!.from in ctx.input.getInputStartVertices()
72+
&& matchedRange.rsmRange!!.from == ctx.fictiveStartState
73+
&& matchedRange.rsmRange.to == ctx.fictiveFinalState
74+
}
6975
/**
7076
* Processes descriptor
7177
* @param descriptor - descriptor to process
@@ -77,7 +83,7 @@ class Gll<VertexType, LabelType : ILabel> private constructor(
7783
val node = getEpsilonRange(descriptor)
7884
//TODO fix
7985
// dirty hack: in fact it's equivavelnt descriptors
80-
// but only initial was added in handlet set
86+
// but only initial was added in handled set
8187
ctx.descriptors.addToHandled(Descriptor(descriptor.inputPosition,
8288
descriptor.gssNode, descriptor.rsmState, node))
8389
node
@@ -87,11 +93,14 @@ class Gll<VertexType, LabelType : ILabel> private constructor(
8793
for (poppedEdge in ctx.gss.pop(descriptor, matchedRange)) {
8894
handlePoppedGssEdge(poppedEdge, descriptor, matchedRange)
8995
}
90-
if (descriptor.gssNode.outgoingEdges.isEmpty() && descriptor.gssNode.rsm.isStart) {
91-
ctx.parseResult = matchedRange
96+
if (isParseResult(descriptor, matchedRange)) {
97+
98+
if(ctx.parseResult == null) {
99+
ctx.parseResult = matchedRange
100+
}
101+
ctx.parseResults.add(matchedRange)
92102
}
93103
}
94-
95104
engine.handleEdges(this, descriptor)
96105
}
97106
}

solver/src/main/kotlin/org/ucfs/parser/IGll.kt

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ interface IGll<InputNodeType, LabelType : ILabel> {
3636
curDescriptor = ctx.descriptors.nextToHandle()
3737
}
3838

39-
return ctx.parseResult
39+
// assert(ctx.parseResult != null)
40+
// assert(ctx.parseResult!!.children.size == 1)
41+
return ctx.parseResult!!.children.get(0)
4042
}
4143

4244
/**
@@ -52,9 +54,9 @@ interface IGll<InputNodeType, LabelType : ILabel> {
5254
fun initDescriptors(input: IInputGraph<InputNodeType, LabelType>) {
5355
for (startVertex in input.getInputStartVertices()) {
5456

55-
val gssNode = ctx.gss.getOrCreateNode(startVertex, ctx.startState)
57+
val gssNode = ctx.gss.getOrCreateNode(startVertex, ctx.fictiveStartState)
5658
val startDescriptor = Descriptor(
57-
startVertex, gssNode, ctx.startState, getEmptyRange()
59+
startVertex, gssNode, ctx.fictiveStartState, getEmptyRange(true)
5860
)
5961
ctx.descriptors.add(startDescriptor)
6062
}

solver/src/main/kotlin/org/ucfs/parser/context/Context.kt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.ucfs.parser.context
22

3-
import org.ucfs.descriptors.Descriptor
43
import org.ucfs.descriptors.DescriptorsStorage
54
import org.ucfs.gss.GraphStructuredStack
65
import org.ucfs.input.IInputGraph
@@ -18,7 +17,8 @@ class Context<InputNodeType, LabelType : ILabel> (
1817
/**
1918
* Starting state of accepting Nonterminal in RSM
2019
*/
21-
val startState: RsmState,
20+
val fictiveStartState: RsmState,
21+
val fictiveFinalState: RsmState,
2222
val input: IInputGraph<InputNodeType, LabelType>
2323

2424

@@ -37,4 +37,5 @@ class Context<InputNodeType, LabelType : ILabel> (
3737
val gss: GraphStructuredStack<InputNodeType> = GraphStructuredStack()
3838

3939
var parseResult: RangeSppfNode<InputNodeType>? = null
40+
var parseResults = ArrayList<RangeSppfNode<InputNodeType>>()
4041
}

solver/src/main/kotlin/org/ucfs/rsm/RsmWrite.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ fun writeRsmToTxt(startState: RsmState, pathToTXT: String) {
8787
}
8888

8989
fun writeRsmToDot(startState: RsmState, filePath: String) {
90-
val states = getAllStates(startState)
90+
val states = getAllStates((startState.outgoingEdges.get(0).symbol as Nonterminal).startState)
9191
val boxes: HashMap<Nonterminal, HashSet<RsmState>> = HashMap()
9292

9393
for (state in states) {

solver/src/main/kotlin/org/ucfs/sppf/node/RangeSppfNode.kt

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,13 @@ data class RangeSppfNode<VertexType>(
2626
val children = ArrayList<RangeSppfNode<VertexType>>()
2727
}
2828

29-
fun <VertexType> getEmptyRange(): RangeSppfNode<VertexType> = RangeSppfNode(null, null, EmptyType())
29+
fun <VertexType> getEmptyRange(isStart: Boolean = false): RangeSppfNode<VertexType> {
30+
val type = EmptyType()
31+
if(isStart) {
32+
type.isStart = isStart
33+
}
34+
return RangeSppfNode(null, null, type)
35+
}
3036

3137
data class InputRange<VertexType>(
3238
val from: VertexType,
@@ -45,4 +51,16 @@ data class TerminalType<T : ITerminal>(val terminal: T) : RangeType
4551
data class NonterminalType(val startState: RsmState) : RangeType
4652
data class EpsilonNonterminalType(val startState: RsmState) : RangeType
4753
data class IntermediateType<VertexType>(val grammarSlot: RsmState, val inputPosition: VertexType) : RangeType
48-
data class EmptyType(private val id: Int = lastEmptyId++) : RangeType
54+
class EmptyType : RangeType {
55+
var isStart = false
56+
57+
@Override
58+
override fun equals(other: Any?): Boolean {
59+
return other is EmptyType
60+
}
61+
62+
@Override
63+
override fun hashCode(): Int {
64+
return 12
65+
}
66+
}

solver/src/main/kotlin/org/ucfs/sppf/writeSppfToDot.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ fun <InputNode> getSppfDot(sppfNode: RangeSppfNode<InputNode>, label: String = "
2727
node = queue.removeFirst()
2828
if (!visited.add(node.hashCode())) continue
2929

30-
nodeViews[node] = getNodeView(node, )
30+
nodeViews[node] = getNodeView(node,)// node.id.toString())
3131

3232
node.children.forEach {
3333
queue.addLast(it)

test-shared/src/test/kotlin/grammars/SimpleDyck.kt

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package grammars
22

33
import org.ucfs.grammar.combinator.Grammar
4+
import org.ucfs.grammar.combinator.extension.StringExtension.or
45
import org.ucfs.grammar.combinator.extension.StringExtension.times
56
import org.ucfs.grammar.combinator.regexp.*
67
import org.ucfs.grammar.combinator.regexp.Epsilon
@@ -11,6 +12,7 @@ class SimplifiedDyck : Grammar() {
1112

1213
init {
1314
S /= Option("(" * S * ")")
15+
// S = eps | ( S )
1416
}
1517
}
1618

@@ -19,13 +21,14 @@ class LoopDyck : Grammar() {
1921

2022
init {
2123
S /= Many( "(" * S * ")")
24+
// S = [ ( S ) ]*
2225
}
2326
}
2427

2528
class ABGrammar : Grammar() {
26-
val A by Nt(Term("a"))
29+
val A by Nt(Term("a")) // A -> a
2730
val C by Nt(Term("a"))
28-
val B by Nt(C)
31+
val B by Nt(C) // C -> B
2932
val S by Nt(A or B).asStart()
3033
}
3134

@@ -36,4 +39,32 @@ class SALang : Grammar() {
3639

3740
class Epsilon : Grammar() {
3841
val S by Nt(Epsilon).asStart()
42+
}
43+
44+
/**
45+
* Can parse only one symbol 'a'
46+
*/
47+
class AmbiguousAStar1: Grammar() {
48+
val S by Nt().asStart()
49+
50+
init {
51+
S /= "a" or S
52+
}
53+
}
54+
55+
class AmbiguousAStar2: Grammar() {
56+
val S by Nt().asStart()
57+
58+
init {
59+
S /= "a" or S * S
60+
}
61+
}
62+
63+
64+
class AmbiguousAStar3: Grammar() {
65+
val S by Nt().asStart()
66+
67+
init {
68+
S /= "a" or S * S
69+
}
3970
}

test-shared/src/test/kotlin/solver/AbstractCorrectnessTest.kt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ abstract class AbstractCorrectnessTest {
1313

1414
abstract fun getRootDataFolder(): Path
1515

16-
val grammars = listOf(SimplifiedDyck(), ABGrammar(), SALang(), Epsilon(), LoopDyck())
16+
val grammars = listOf(SimplifiedDyck(), ABGrammar(), SALang(), Epsilon(), LoopDyck(), AmbiguousAStar2(), AmbiguousAStar1())
17+
//TODO return only one result for ambiguous AmbiguousAStar2(), AmbiguousAStar1()
18+
// TODO fix worst case for loopdyck
1719
val regenerate = false
1820
//@TestFactory
1921
//TODO make it abstract by used grammar

0 commit comments

Comments
 (0)