From d2d7a4917ad2cebecb476c31079094192c2aa9a1 Mon Sep 17 00:00:00 2001 From: Yusuke Asai Date: Mon, 11 Aug 2025 12:01:40 +0900 Subject: [PATCH 1/3] Remove unnecessary branches in generated parser; improve coverage --- peg.peg.go | 108 ++++++++++------------------------------ peg_test.go | 139 ++++++++++++++++++++++++++++++++++++++++++++++++++++ tree/peg.go | 61 +++++++++++++++++++++-- 3 files changed, 224 insertions(+), 84 deletions(-) diff --git a/peg.peg.go b/peg.peg.go index 72d5e25..beb37aa 100644 --- a/peg.peg.go +++ b/peg.peg.go @@ -864,9 +864,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { goto l22 } position++ - if !_rules[ruleSpacing]() { - goto l22 - } + _rules[ruleSpacing]() { position24, tokenIndex24 := position, tokenIndex { @@ -875,9 +873,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { goto l25 } position++ - if !_rules[ruleSpacing]() { - goto l25 - } + _rules[ruleSpacing]() l27: { position28, tokenIndex28 := position, tokenIndex @@ -888,16 +884,12 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { goto l28 } position++ - if !_rules[ruleSpacing]() { - goto l28 - } + _rules[ruleSpacing]() goto l27 l28: position, tokenIndex = position28, tokenIndex28 } - if !_rules[ruleSpacing]() { - goto l25 - } + _rules[ruleSpacing]() if buffer[position] != ')' { goto l25 } @@ -916,9 +908,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { } } l24: - if !_rules[ruleSpacing]() { - goto l22 - } + _rules[ruleSpacing]() add(ruleImport, position23) } goto l21 @@ -962,9 +952,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { goto l0 } position++ - if !_rules[ruleSpacing]() { - goto l0 - } + _rules[ruleSpacing]() if !_rules[ruleAction]() { goto l0 } @@ -982,9 +970,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { if !_rules[ruleLeftArrow]() { goto l0 } - if !_rules[ruleExpression]() { - goto l0 - } + _rules[ruleExpression]() { add(ruleAction5, position) } @@ -1030,9 +1016,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { if !_rules[ruleLeftArrow]() { goto l33 } - if !_rules[ruleExpression]() { - goto l33 - } + _rules[ruleExpression]() { add(ruleAction5, position) } @@ -1359,23 +1343,17 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { { position92 := position position++ - if !_rules[ruleSpacing]() { - goto l88 - } + _rules[ruleSpacing]() add(ruleBegin, position92) } - if !_rules[ruleExpression]() { - goto l88 - } + _rules[ruleExpression]() { position93 := position if buffer[position] != '>' { goto l88 } position++ - if !_rules[ruleSpacing]() { - goto l88 - } + _rules[ruleSpacing]() add(ruleEnd, position93) } { @@ -1392,9 +1370,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { { position96 := position position++ - if !_rules[ruleSpacing]() { - goto l88 - } + _rules[ruleSpacing]() add(ruleDot, position96) } { @@ -1485,9 +1461,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { position++ } l99: - if !_rules[ruleSpacing]() { - goto l88 - } + _rules[ruleSpacing]() add(ruleClass, position98) } case '"', '\'': @@ -1546,9 +1520,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { goto l113 } position++ - if !_rules[ruleSpacing]() { - goto l113 - } + _rules[ruleSpacing]() goto l112 l113: position, tokenIndex = position112, tokenIndex112 @@ -1603,9 +1575,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { goto l88 } position++ - if !_rules[ruleSpacing]() { - goto l88 - } + _rules[ruleSpacing]() } l112: add(ruleLiteral, position111) @@ -1614,23 +1584,17 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { { position128 := position position++ - if !_rules[ruleSpacing]() { - goto l88 - } + _rules[ruleSpacing]() add(ruleOpen, position128) } - if !_rules[ruleExpression]() { - goto l88 - } + _rules[ruleExpression]() { position129 := position if buffer[position] != ')' { goto l88 } position++ - if !_rules[ruleSpacing]() { - goto l88 - } + _rules[ruleSpacing]() add(ruleClose, position129) } default: @@ -1662,9 +1626,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { { position135 := position position++ - if !_rules[ruleSpacing]() { - goto l132 - } + _rules[ruleSpacing]() add(rulePlus, position135) } { @@ -1674,9 +1636,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { { position137 := position position++ - if !_rules[ruleSpacing]() { - goto l132 - } + _rules[ruleSpacing]() add(ruleStar, position137) } { @@ -1689,9 +1649,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { goto l132 } position++ - if !_rules[ruleSpacing]() { - goto l132 - } + _rules[ruleSpacing]() add(ruleQuestion, position139) } { @@ -1756,9 +1714,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { } add(rulePegText, position144) } - if !_rules[ruleSpacing]() { - goto l142 - } + _rules[ruleSpacing]() add(ruleIdentifier, position143) } memoize(11, position142, tokenIndex142, true) @@ -2570,9 +2526,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { position++ } l264: - if !_rules[ruleSpacing]() { - goto l262 - } + _rules[ruleSpacing]() add(ruleLeftArrow, position263) } memoize(23, position262, tokenIndex262, true) @@ -2594,9 +2548,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { goto l266 } position++ - if !_rules[ruleSpacing]() { - goto l266 - } + _rules[ruleSpacing]() add(ruleSlash, position267) } memoize(24, position266, tokenIndex266, true) @@ -2618,9 +2570,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { goto l268 } position++ - if !_rules[ruleSpacing]() { - goto l268 - } + _rules[ruleSpacing]() add(ruleAnd, position269) } memoize(25, position268, tokenIndex268, true) @@ -2642,9 +2592,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { goto l270 } position++ - if !_rules[ruleSpacing]() { - goto l270 - } + _rules[ruleSpacing]() add(ruleNot, position271) } memoize(26, position270, tokenIndex270, true) @@ -2904,9 +2852,7 @@ func (p *Peg[U]) Init(options ...func(*Peg[U]) error) error { goto l309 } position++ - if !_rules[ruleSpacing]() { - goto l309 - } + _rules[ruleSpacing]() add(ruleAction, position310) } memoize(43, position309, tokenIndex309, true) diff --git a/peg_test.go b/peg_test.go index 3bd4f16..adba568 100644 --- a/peg_test.go +++ b/peg_test.go @@ -149,6 +149,145 @@ Expr <- 'CJK' / '汉字' / 'test' } } +func TestCheckAlwaysSucceeds(t *testing.T) { + pegHeader := ` +package main +type Test Peg {} +` + + testCases := []struct { + name string + testRule string + expectedResult bool + }{ + { + name: "Character expression does not always succeed (TypeChar)", + testRule: `A <- 'a'`, + expectedResult: false, + }, + { + name: "Star expression always succeed (TypeStar)", + testRule: `A <- 'a'*`, + expectedResult: true, + }, + { + name: "Dot expression does not always succeed (TypeDot)", + testRule: `A <- .`, + expectedResult: false, + }, + { + name: "Range expression does not always succeed (TypeRange)", + testRule: `A <- [a-z]`, + expectedResult: false, + }, + { + name: "String expression does not always succeed (TypeString)", + testRule: `A <- "abc"`, + expectedResult: false, + }, + { + name: "Predicate expression does not always succeed (TypePredicate)", + testRule: `A <- &{ true } 'a'*`, + expectedResult: false, + }, + { + name: "StateChange expression does not always succeed (TypeStateChange)", + testRule: `A <- !{ false } 'a'*`, + expectedResult: false, + }, + { + name: "Action expression does not always succeed (TypeAction)", + testRule: `A <- { } 'a'*`, + expectedResult: true, + }, + { + name: "Space expression does not always succeed (TypeSpace)", + testRule: `A <- ' '`, + expectedResult: false, + }, + { + name: "PeekFor expression does not always succeed (TypePeekFor)", + testRule: `A <- &'a'`, + expectedResult: false, + }, + { + name: "PeekNot expression does not always succeed (TypePeekNot)", + testRule: `A <- !'a'`, + expectedResult: false, + }, + { + name: "Plus expression does not always succeed (TypePlus)", + testRule: `A <- 'a'+`, + expectedResult: false, + }, + { + name: "Push expression does not always succeed (TypePush)", + testRule: `A <- <'a'*>`, + expectedResult: true, + }, + { + name: "Nil expression always succeeds (TypeNil)", + testRule: `A <- `, + expectedResult: true, + }, + { + name: "Optional expression always succeeds (TypeQuery)", + testRule: `A <- 'b'?`, + expectedResult: true, + }, + { + name: "Nested star expression always succeeds", + testRule: `A <- ('a' / 'b')*`, + expectedResult: true, + }, + { + name: "Sequence with star always succeeds", + testRule: `A <- 'a'* 'b'*`, + expectedResult: true, + }, + { + name: "Sequence with non-star does not always succeed", + testRule: `A <- 'a'* 'b'`, + expectedResult: false, + }, + { + name: "Alternate with star always succeeds", + testRule: `A <- 'a' / 'b'*`, + expectedResult: true, + }, + { + name: "Alternate without star does not always succeed", + testRule: `A <- 'a' / 'b'`, + expectedResult: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + sourceCode := pegHeader + tc.testRule + + p := &Peg[uint32]{Tree: tree.New(false, true, true), Buffer: sourceCode} + _ = p.Init(Size[uint32](1 << 15)) + if err := p.Parse(); err != nil { + t.Fatal(err) + } + p.Execute() + buf := &bytes.Buffer{} + _ = p.Compile("", []string{"peg"}, buf) + + if len(p.Tree.RuleNames) == 0 { + t.Fatal("No rules found in the parsed tree") + } + rule := p.Tree.RuleNames[0] + actualResult := rule.CheckAlwaysSucceeds(p.Tree) + if actualResult != tc.expectedResult { + t.Errorf("Rule [%s]: expected CheckAlwaysSucceeds() = %v, got %v", + tc.name, tc.expectedResult, actualResult) + } + }) + } +} + var pegFileContents = func(files []string) []string { contents := make([]string, len(files)) for i, file := range files { diff --git a/tree/peg.go b/tree/peg.go index de54d7a..fb4ef1b 100644 --- a/tree/peg.go +++ b/tree/peg.go @@ -248,6 +248,56 @@ func (n *node) SetParentMultipleKey(multipleKey bool) { n.parentMultipleKey = multipleKey } +func (n *node) CheckAlwaysSucceeds(t *Tree) bool { + visited := make(map[*node]bool) + return n.checkAlwaysSucceedsRecursion(t, visited) +} + +func (n *node) checkAlwaysSucceedsRecursion(t *Tree, visited map[*node]bool) bool { + switch n.GetType() { + case TypeRule: + if child := n.Front(); child != nil { + return child.checkAlwaysSucceedsRecursion(t, visited) + } + return false + case TypeName: + rule := t.Rules[n.String()] + if rule == nil { + return false + } + if visited[rule] { + return true + } + visited[rule] = true + result := rule.Front().checkAlwaysSucceedsRecursion(t, visited) + visited[rule] = false + return result + case TypeAlternate, TypeUnorderedAlternate: + for element := range n.Iterator() { + if element.checkAlwaysSucceedsRecursion(t, visited) { + return true + } + } + return false + case TypeSequence: + for element := range n.Iterator() { + if !element.checkAlwaysSucceedsRecursion(t, visited) { + return false + } + } + return true + case TypePush, TypeImplicitPush: + if child := n.Front(); child != nil { + return child.checkAlwaysSucceedsRecursion(t, visited) + } + return false + case TypeAction, TypeQuery, TypeStar, TypeNil: + return true + default: + return false + } +} + // Tree is a tree data structure into which a PEG can be parsed. type Tree struct { Rules map[string]*node @@ -941,9 +991,14 @@ func (t *Tree) Compile(file string, args []string, out io.Writer) (err error) { compile(element, ko) return } - _print("\n if !_rules[rule%v]() {", name /*rule.GetID()*/) - printJump(ko) - _print("}") + // If the rule always succeeds, do not output the if statement + if rule.CheckAlwaysSucceeds(t) { + _print("\n _rules[rule%v]()", name /*rule.GetID()*/) + } else { + _print("\n if !_rules[rule%v]() {", name /*rule.GetID()*/) + printJump(ko) + _print("}") + } case TypeRange: if n.ParentDetect() { _print("\nposition++") From d807f618d433fa11d50f9356cd04843422205cdc Mon Sep 17 00:00:00 2001 From: Yusuke Asai Date: Mon, 11 Aug 2025 12:25:52 +0900 Subject: [PATCH 2/3] fix: update rule name access in CheckAlwaysSucceeds test --- peg_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/peg_test.go b/peg_test.go index adba568..2128f8a 100644 --- a/peg_test.go +++ b/peg_test.go @@ -275,10 +275,10 @@ type Test Peg {} buf := &bytes.Buffer{} _ = p.Compile("", []string{"peg"}, buf) - if len(p.Tree.RuleNames) == 0 { + if len(p.RuleNames) == 0 { t.Fatal("No rules found in the parsed tree") } - rule := p.Tree.RuleNames[0] + rule := p.RuleNames[0] actualResult := rule.CheckAlwaysSucceeds(p.Tree) if actualResult != tc.expectedResult { t.Errorf("Rule [%s]: expected CheckAlwaysSucceeds() = %v, got %v", From 8e91dd8d6842638551742deaf2cd748cda235bdb Mon Sep 17 00:00:00 2001 From: Yusuke Asai Date: Mon, 11 Aug 2025 13:36:02 +0900 Subject: [PATCH 3/3] fix: update test names to reflect correct conditions --- peg_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/peg_test.go b/peg_test.go index 2128f8a..96aae63 100644 --- a/peg_test.go +++ b/peg_test.go @@ -166,7 +166,7 @@ type Test Peg {} expectedResult: false, }, { - name: "Star expression always succeed (TypeStar)", + name: "Star expression always succeeds (TypeStar)", testRule: `A <- 'a'*`, expectedResult: true, }, @@ -196,7 +196,7 @@ type Test Peg {} expectedResult: false, }, { - name: "Action expression does not always succeed (TypeAction)", + name: "Action expression always succeeds (TypeAction)", testRule: `A <- { } 'a'*`, expectedResult: true, }, @@ -221,7 +221,7 @@ type Test Peg {} expectedResult: false, }, { - name: "Push expression does not always succeed (TypePush)", + name: "Push expression always succeeds (TypePush)", testRule: `A <- <'a'*>`, expectedResult: true, },