Skip to content

Commit 8576f2c

Browse files
committed
feat: add *Grammar.IsLeftTerminating and fix *Grammar.IsValid panic on non-left terminating rule
1 parent bb1845e commit 8576f2c

File tree

7 files changed

+293
-23
lines changed

7 files changed

+293
-23
lines changed

dag.go

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ type Depgraph map[string]*node
1616
// It includes core rules only if necessary.
1717
func (g *Grammar) DependencyGraph() Depgraph {
1818
graph := Depgraph{}
19+
for _, corerule := range coreRules {
20+
graph[corerule.Name] = &node{
21+
Rulename: corerule.Name,
22+
Dependencies: getDependencies(corerule.Alternation),
23+
}
24+
}
1925
for _, rule := range g.Rulemap {
2026
graph[rule.Name] = &node{
2127
Rulename: rule.Name,
@@ -78,6 +84,139 @@ func (g *Grammar) IsDAG() bool {
7884
return true
7985
}
8086

87+
// RuleContainsCycle returns whether the rule contains a cycle or not.
88+
// It travels through the whole rule dependency graph, such that
89+
// it checks if the rule is cyclic AND if one of its dependency is too.
90+
//
91+
// WARNING: it is different than IsLeftTerminating, refer to its doc.
92+
func (g *Grammar) RuleContainsCycle(rulename string) (bool, error) {
93+
// Check the rule exists
94+
rule := GetRule(rulename, g.Rulemap)
95+
if rule == nil {
96+
return false, &ErrRuleNotFound{
97+
Rulename: rulename,
98+
}
99+
}
100+
101+
// Get all SCCs
102+
scc := &cycle{
103+
index: 0,
104+
stack: []*node{},
105+
dg: g.DependencyGraph(),
106+
}
107+
scc.find()
108+
109+
return ruleContainsCycle(scc.sccs, rulename), nil
110+
}
111+
112+
// IsLeftTerminating returns whether the rule is not left terminating.
113+
// It travels through the whole rule dependency graph, such that
114+
// it checks if the rule has a way to left terminate.
115+
//
116+
// Notice that it depends on the ordering your grammar, which could be
117+
// illustrated by the ABNF rule "element" that begins with the alternation
118+
// of a "rulename", which is terminating, and not by "option" or "group"
119+
// which are not.
120+
//
121+
// WARNING: it is different than RuleContainsCycle, refer to its doc.
122+
func (g *Grammar) IsLeftTerminating(rulename string) (bool, error) {
123+
// Check the rule exists
124+
rule := GetRule(rulename, g.Rulemap)
125+
if rule == nil {
126+
return false, &ErrRuleNotFound{
127+
Rulename: rulename,
128+
}
129+
}
130+
131+
// Stack has the same signature as a rulemap in order to use getRuleIn for simplicity
132+
stack := map[string]*Rule{
133+
rulename: rule,
134+
}
135+
return isAltLeftTerminating(g, stack, rule.Alternation), nil
136+
}
137+
138+
func isAltLeftTerminating(g *Grammar, stack map[string]*Rule, alt Alternation) bool {
139+
for _, con := range alt.Concatenations {
140+
for _, rep := range con.Repetitions {
141+
_, subIsOption := rep.Element.(ElemOption)
142+
if rep.Min == 0 || subIsOption {
143+
if !isElemLeftTerminating(g, stack, rep.Element) {
144+
return false
145+
}
146+
continue
147+
}
148+
if !isElemLeftTerminating(g, stack, rep.Element) {
149+
return false
150+
}
151+
break
152+
}
153+
}
154+
return true
155+
}
156+
157+
func isElemLeftTerminating(g *Grammar, stack map[string]*Rule, elem ElemItf) bool {
158+
switch v := elem.(type) {
159+
case ElemRulename:
160+
ruleInStack := (getRuleIn(v.Name, stack) != nil)
161+
if ruleInStack {
162+
return false
163+
}
164+
rule := GetRule(v.Name, g.Rulemap)
165+
return isAltLeftTerminating(g, stack, rule.Alternation)
166+
case ElemOption:
167+
return isAltLeftTerminating(g, stack, v.Alternation)
168+
case ElemGroup:
169+
return isAltLeftTerminating(g, stack, v.Alternation)
170+
case ElemCharVal:
171+
return len(v.Values) != 0
172+
case ElemProseVal:
173+
return len(v.values) != 0
174+
}
175+
return true
176+
}
177+
178+
func ruleContainsCycle(sccs [][]*node, rulename string) bool {
179+
// Find rulename's SCC
180+
scc := ([]*node)(nil)
181+
rulenode := (*node)(nil)
182+
for _, s := range sccs {
183+
if scc != nil {
184+
break
185+
}
186+
for _, ss := range s {
187+
if ss.Rulename == rulename {
188+
rulenode = ss
189+
scc = s
190+
break
191+
}
192+
}
193+
}
194+
195+
// Check if cyclic
196+
dependsOn := false
197+
for _, dep := range rulenode.Dependencies {
198+
if dep == rulename {
199+
dependsOn = true
200+
break
201+
}
202+
}
203+
if dependsOn || len(scc) != 1 {
204+
// If it depends on itself or is part of an SCC, then is cylic
205+
return true
206+
}
207+
208+
// Propagate to deps
209+
for _, dep := range rulenode.Dependencies {
210+
if dep == rulename {
211+
continue
212+
}
213+
if ruleContainsCycle(sccs, dep) {
214+
return true
215+
}
216+
}
217+
return false
218+
}
219+
81220
type cycle struct {
82221
index int
83222
stack []*node

dag_test.go

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,129 @@ func Test_U_IsDAG(t *testing.T) {
5050
})
5151
}
5252
}
53+
54+
func Test_U_RuleContainsCycle(t *testing.T) {
55+
t.Parallel()
56+
57+
var tests = map[string]struct {
58+
Grammar *goabnf.Grammar
59+
Rulename string
60+
ExpectedRes bool
61+
ExpectErr bool
62+
}{
63+
"abnf": {
64+
Grammar: goabnf.ABNF,
65+
Rulename: "rulelist",
66+
ExpectedRes: true,
67+
ExpectErr: false,
68+
},
69+
"a-self-left": {
70+
Grammar: mustGrammar("a = a \"a\"\r\n"),
71+
Rulename: "a",
72+
ExpectedRes: true,
73+
ExpectErr: false,
74+
},
75+
"a-self-right": {
76+
Grammar: mustGrammar("a = \"a\" a\r\n"),
77+
Rulename: "a",
78+
ExpectedRes: true,
79+
ExpectErr: false,
80+
},
81+
"ab": {
82+
Grammar: mustGrammar("a = b \"a\"\r\nb = a \"b\"\r\n"),
83+
Rulename: "a",
84+
ExpectedRes: true,
85+
ExpectErr: false,
86+
},
87+
}
88+
89+
for testname, tt := range tests {
90+
t.Run(testname, func(t *testing.T) {
91+
assert := assert.New(t)
92+
93+
res, err := tt.Grammar.RuleContainsCycle(tt.Rulename)
94+
if (err != nil) != tt.ExpectErr {
95+
t.Fatalf("Expected error: %t ; got %v", tt.ExpectErr, err)
96+
}
97+
assert.Equal(tt.ExpectedRes, res)
98+
})
99+
}
100+
}
101+
102+
func Test_U_IsLeftTerminating(t *testing.T) {
103+
t.Parallel()
104+
105+
var tests = map[string]struct {
106+
Grammar *goabnf.Grammar
107+
Rulename string
108+
ExpectedRes bool
109+
ExpectErr bool
110+
}{
111+
"abnf": {
112+
Grammar: goabnf.ABNF,
113+
Rulename: "rulelist",
114+
ExpectedRes: true,
115+
ExpectErr: false,
116+
},
117+
"a-left": {
118+
Grammar: mustGrammar("a = a \"a\"\r\n"),
119+
Rulename: "a",
120+
ExpectedRes: false,
121+
ExpectErr: false,
122+
},
123+
"a-right": {
124+
Grammar: mustGrammar("a = \"a\" a\r\n"),
125+
Rulename: "a",
126+
ExpectedRes: true,
127+
ExpectErr: false,
128+
},
129+
"ab-left": {
130+
Grammar: mustGrammar("a = b \"a\"\r\nb = a \"b\"\r\n"),
131+
Rulename: "a",
132+
ExpectedRes: false,
133+
ExpectErr: false,
134+
},
135+
"ab-right": {
136+
Grammar: mustGrammar("a = b \"a\"\r\nb = \"b\"\r\n"),
137+
Rulename: "a",
138+
ExpectedRes: true,
139+
ExpectErr: false,
140+
},
141+
"option-a-left": {
142+
Grammar: mustGrammar("a = [a] \"a\"\r\n"),
143+
Rulename: "a",
144+
ExpectedRes: false,
145+
ExpectErr: false,
146+
},
147+
"option-a-right": {
148+
Grammar: mustGrammar("a = [\"a\"] a\r\n"),
149+
Rulename: "a",
150+
ExpectedRes: false,
151+
ExpectErr: false,
152+
},
153+
"group-a-left": {
154+
Grammar: mustGrammar("a = (a) \"a\"\r\n"),
155+
Rulename: "a",
156+
ExpectedRes: false,
157+
ExpectErr: false,
158+
},
159+
"group-a-right": {
160+
Grammar: mustGrammar("a = (\"a\") a\r\n"),
161+
Rulename: "a",
162+
ExpectedRes: true,
163+
ExpectErr: false,
164+
},
165+
}
166+
167+
for testname, tt := range tests {
168+
t.Run(testname, func(t *testing.T) {
169+
assert := assert.New(t)
170+
171+
res, err := tt.Grammar.IsLeftTerminating(tt.Rulename)
172+
if (err != nil) != tt.ExpectErr {
173+
t.Fatalf("Expected error: %t ; got %v", tt.ExpectErr, err)
174+
}
175+
assert.Equal(tt.ExpectedRes, res)
176+
})
177+
}
178+
}

generate.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ func (opt thresholdOption) apply(opts *genOpts) {
142142
// checkCanGenerateSafely returns no error if the rule can be generated
143143
// safely i.e. if the rule can exist without infinite recursion.
144144
// Factually, it checks if all involved rules have no path v such that it
145-
// produces a cycle (v:rule-*->rulen) AND that this path is mandatory
145+
// produces a cycle (v:rule-*->rule) AND that this path is mandatory
146146
// (no option, no repetition with a minimum of zero).
147147
func checkCanGenerateSafely(g *Grammar, rulename string) error {
148148
rule := GetRule(rulename, g.Rulemap)

generate_fuzz_test.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@ func FuzzGenerate(f *testing.F) {
1818
if len(out) != 0 {
1919
t.Fatal("output should be empty in case of an error")
2020
}
21-
} else {
22-
valid := goabnf.ABNF.IsValid(rulename, out)
23-
if !valid {
24-
t.Fatalf("generated output is invalid, out: %s", out)
25-
}
21+
return
22+
}
23+
24+
valid, err := goabnf.ABNF.IsValid(rulename, out)
25+
if err != nil || !valid {
26+
t.Fatalf("generated output is invalid, out: %s", out)
2627
}
2728
})
2829
}

grammar.go

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,16 @@ type Grammar struct {
1515
// IsValid checks there exist at least a path that completly consumes
1616
// input, hence is valide given this gramma and especially one of its
1717
// rule.
18-
//
19-
// XXX panic if involves unvaoidable recursive rule.
20-
// - checkCanGenerateSafely is not sufficient as parseAlt will recurse without limitation
21-
// (checks there are none unavoidable path on its road, not that it won't fall/recurse into it)
22-
// - deepness is not sufficient as recursing could be very difficult to predict
23-
func (g *Grammar) IsValid(rulename string, input []byte, opts ...ParseOption) bool {
18+
func (g *Grammar) IsValid(rulename string, input []byte, opts ...ParseOption) (bool, error) {
19+
lt, err := g.IsLeftTerminating(rulename)
20+
if err != nil {
21+
return false, err
22+
}
23+
if !lt {
24+
return false, fmt.Errorf("rule %s is not left terminating thus can't be validated without the risk of infinite recursion", rulename)
25+
}
2426
paths, err := Parse(input, g, rulename, opts...)
25-
return len(paths) != 0 && err == nil
27+
return len(paths) != 0 && err == nil, nil
2628
}
2729

2830
// String returns the representation of the grammar that is valid

grammar_fuzz_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,19 @@ func FuzzParseABNF(f *testing.F) {
1212
}
1313

1414
f.Fuzz(func(t *testing.T, input []byte) {
15-
path, err := goabnf.ParseABNF(input)
15+
grammar, err := goabnf.ParseABNF(input)
1616

1717
if err != nil {
18-
if path != nil {
18+
if grammar != nil {
1919
t.Fatal("Expected no path when error")
2020
}
2121
if err, ok := err.(*goabnf.ErrMultipleSolutionsFound); ok {
2222
t.Fatalf("For input %s, got error %s", input, err)
2323
}
24-
} else {
25-
if path == nil {
26-
t.Fatal("Expected a path when no error")
27-
}
24+
return
25+
}
26+
if grammar == nil {
27+
t.Fatal("Expected a grammar when no error")
2828
}
2929
})
3030
}

utils.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,11 +151,13 @@ func pow(v, e int) int {
151151
// it is a core rule or present in the grammar, or nil if not found.
152152
// It validates the RFC 5234 Section 2.1 "rule names are case insensitive".
153153
func GetRule(rulename string, rulemap map[string]*Rule) *Rule {
154-
for _, coreRule := range coreRules {
155-
if strings.EqualFold(rulename, coreRule.Name) {
156-
return coreRule
157-
}
154+
if rule := getRuleIn(rulename, coreRules); rule != nil {
155+
return rule
158156
}
157+
return getRuleIn(rulename, rulemap)
158+
}
159+
160+
func getRuleIn(rulename string, rulemap map[string]*Rule) *Rule {
159161
for _, rule := range rulemap {
160162
if strings.EqualFold(rulename, rule.Name) {
161163
return rule

0 commit comments

Comments
 (0)