Skip to content

Commit

Permalink
Fix PGN parsing of games w/ nested variations (#125)
Browse files Browse the repository at this point in the history
* Fix PGN parsing of games w/ nested variations

Existing moveListWithComments() contains a parsing bug when the pgn in
question contains a game with nested variations. In this case the
moveListTokenRe regex doesn't work as expected and results in attempts
to parse an illegal game. To demonstrate the problem this commit adds
a TestScannerWithNested() test case along with a 0013.pgn fixture
which is an export of a lichess study. This commit also fixes the
problem by adding a stripVariations() preprocess step in
moveListWithComments(). I have a subsequent commit coming which adds a
Scanner option to include variations in the game list when processing
a pgn.

* simplified tests

---------

Co-authored-by: Logan Spears <loganjspears@gmail.com>
  • Loading branch information
mikeb26 and notnil authored Nov 24, 2024
1 parent 48213c4 commit 61dffef
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 6 deletions.
39 changes: 39 additions & 0 deletions fixtures/pgns/0013.pgn
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
[Event "Test Nested Variations for https://github.com/notnil/chess: Smith-Morra Accepted; Paulsen Formation"]
[Site "https://lichess.org/study/lOO87pCX/BZELlaSA"]
[Result "*"]
[UTCDate "2023.05.26"]
[UTCTime "16:09:59"]
[Variant "Standard"]
[ECO "B21"]
[Opening "Sicilian Defense: Smith-Morra Gambit Accepted, Paulsen Formation"]
[Annotator "https://lichess.org/@/bughouse26"]

1. e4 c5 2. d4 cxd4 3. c3 dxc3 4. Nxc3 Nc6 5. Nf3 e6 6. Bc4 a6 7. O-O b5 { move comment with with braces ? } 8. Bb3 *


[Event "Test Nested Variations for https://github.com/notnil/chess: Smith-Morra Declined; Alapin Formation"]
[Site "https://lichess.org/study/lOO87pCX/fC2Vw8Fz"]
[Result "*"]
[UTCDate "2023.05.26"]
[UTCTime "16:13:20"]
[Variant "Standard"]
[ECO "B22"]
[Opening "Sicilian Defense: Alapin Variation, Smith-Morra Declined"]
[Annotator "https://lichess.org/@/bughouse26"]

1. e4 c5 2. d4 cxd4 3. c3 Nf6 4. e5 Nd5 5. Nf3 Nc6 6. cxd4 d6 { chapter 2 main line move comment with parens ( test ) } 7. Bc4 Nb6 (7... dxe5 { chapter 2 variation move comment with unmatched paren ( } 8. dxe5) 8. Bb5 *


[Event "Test Nested Variations for https://github.com/notnil/chess: Smith-Morra Declined; Push Variation"]
[Site "https://lichess.org/study/lOO87pCX/XaAI645Y"]
[Result "*"]
[UTCDate "2023.05.26"]
[UTCTime "16:16:43"]
[Variant "Standard"]
[ECO "B21"]
[Opening "Sicilian Defense: Smith-Morra Gambit Declined, Push Variation"]
[Annotator "https://lichess.org/@/bughouse26"]

1. e4 c5 2. d4 cxd4 3. c3 d3 4. Bxd3 Nc6 { chapter 3 main line move comment } 5. Nf3 g6 6. O-O (6. c4 Bg7 { chapter 3 variation 1 comment } 7. O-O) 6... Bg7 7. Qe2 d6 (7... Nf6 8. Rd1 { chapter 3 variation 2 comment } 8... O-O (8... d6 9. e5 dxe5 { chapter 3 nested variation comment } 10. Bxg6 Qc7 { chapter 3 double nested variation comment with parens and moves ( 1. e4 e5 2. Nf3 ) } (10... Qxd1+ 11. Qxd1) 11. Bc2) 9. e5) 8. Rd1 Qc7 9. Na3 a6 10. Bf4 Nf6 11. e5 dxe5 (11... Nh5 12. exd6) (11... Nd5 12. exd6) *


56 changes: 53 additions & 3 deletions pgn.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,11 @@ func (a multiDecoder) Decode(pos *Position, s string) (*Move, error) {

func decodePGN(pgn string) (*Game, error) {
tagPairs := getTagPairs(pgn)
moveComments, outcome := moveListWithComments(pgn)
moveComments, outcome, err := moveListWithComments(pgn)
if err != nil {
return nil, err
}

gameFuncs := []func(*Game){}
for _, tp := range tagPairs {
if strings.ToLower(tp.Key) == "fen" {
Expand All @@ -179,6 +183,7 @@ func decodePGN(pgn string) (*Game, error) {
g.comments = append(g.comments, move.Comments)
}
g.outcome = outcome

return g, nil
}

Expand Down Expand Up @@ -233,10 +238,15 @@ type moveWithComment struct {

var moveListTokenRe = regexp.MustCompile(`(?:\d+\.)|(O-O(?:-O)?|\w*[abcdefgh][12345678]\w*(?:=[QRBN])?(?:\+|#)?)|(?:\{([^}]*)\})|(?:\([^)]*\))|(\*|0-1|1-0|1\/2-1\/2)`)

func moveListWithComments(pgn string) ([]moveWithComment, Outcome) {
func moveListWithComments(pgn string) ([]moveWithComment, Outcome, error) {
pgn = stripTagPairs(pgn)
var outcome Outcome
moves := []moveWithComment{}
// moveListTokenRe doesn't work w/ nested variations
pgn, err := stripVariations(pgn)
if err != nil {
return moves, outcome, err
}

for _, match := range moveListTokenRe.FindAllStringSubmatch(pgn, -1) {
move, commentText, outcomeText := match[1], match[2], match[3]
Expand All @@ -257,7 +267,7 @@ func moveListWithComments(pgn string) ([]moveWithComment, Outcome) {
moves = append(moves, moveWithComment{MoveStr: move})
}
}
return moves, outcome
return moves, outcome, nil
}

func stripTagPairs(pgn string) string {
Expand All @@ -271,3 +281,43 @@ func stripTagPairs(pgn string) string {
}
return strings.Join(cp, "\n")
}

func stripVariations(pgn string) (string, error) {
var ret strings.Builder

variationDepth := 0
inCommentSection := false

for _, c := range pgn {
if c == '{' {
if inCommentSection {
return "", fmt.Errorf("chess: pgn decode mismatched { in variation: %v", pgn)
}
inCommentSection = true
} else if c == '}' {
if !inCommentSection {
return "", fmt.Errorf("chess: pgn decode mismatched } in variation: %v", pgn)
}
inCommentSection = false
}
if !inCommentSection && c == '(' {
variationDepth++
continue
}
if !inCommentSection && c == ')' {
if variationDepth <= 0 {
return "", fmt.Errorf("chess: pgn decode mismatched parenthesis in variation: %v", pgn)
}
variationDepth--
continue
}
if variationDepth == 0 {
_, err := ret.WriteRune(c)
if err != nil {
return "", err
}
}
}

return ret.String(), nil
}
14 changes: 11 additions & 3 deletions pgn_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,12 @@ func TestWriteComments(t *testing.T) {
}

func TestScanner(t *testing.T) {
for _, fname := range []string{"fixtures/pgns/0006.pgn", "fixtures/pgns/0007.pgn"} {
m := map[string]int{
"fixtures/pgns/0006.pgn": 5,
"fixtures/pgns/0007.pgn": 5,
"fixtures/pgns/0013.pgn": 3,
}
for fname, count := range m {
f, err := os.Open(fname)
if err != nil {
panic(err)
Expand All @@ -146,10 +151,13 @@ func TestScanner(t *testing.T) {
games := []*Game{}
for scanner.Scan() {
game := scanner.Next()
if len(game.Moves()) == 0 {
continue
}
games = append(games, game)
}
if len(games) != 5 {
t.Fatalf(fname+" expected 5 games but got %d", len(games))
if len(games) != count {
t.Fatalf(fname+" expected %d games but got %d", count, len(games))
}
}
}
Expand Down

0 comments on commit 61dffef

Please sign in to comment.