#!/usr/bin/perl -w #!/usr/local/bin/perl -w ################################################################################ # Program: pgn # Author: Tom Likens # Desc: This script parses and displays the games in a PGN file. # History: # 09-JAN-03 thl 1.00 Initial version # 21-FEB-04 thl 1.01 Only "unique" (i.e. non-redundant) games are # counted. Duplicate games are thrown out. # 31-MAY-04 thl 1.02 All {..} type comments and spurious [...] ID # fields are now stripped out of the game moves. # Output format cleaned up and improved. # 02-JUN-04 thl 1.03 Added support for removing "nested" PGN comments. # The script now keys off of the [Result""] field, # which is *always* guaranteed to be there. # It also alters the formatting depending on the # maximum size of the player's names. # 24-APR-05 thl 1.04 Added the "-g #" command-line option which allows # the user to select 'n' number of games to parse. # This is useful for comparing the progress of an # active match against a previous match's results. # 25-APR-05 thl 1.05 Added an elo difference estimatation if the # PGN file only contains a two player match. # ################################################################################ require 5.6.0; # At least Perl 5.6.0 is required. use warnings; # Warn us about suspicious and questionable perl constructs. use strict; # Force explicit declaration of all variables. # Program revision number (this is altered under CVS check-in/check-out). my $Version = '$Revision: 1.04 $'; #' # <-- This first tick is not used, but without it the color-highlighting # mode of emacs is completely hosed up. # #--------------------------------------------------------------------------- # -- Globals -- #--------------------------------------------------------------------------- my $Prg= $0; # Save the name of the program. my $Filename; my $Debug; my $GameCnt= 0; # If set, this will print out the individual game results. my $Verbose; my $Usage = " Usage: $Prg [-options] Options: -h |-help|--help : prints this usage message -d |--debug : prints out internal variable values (debug use only) -v |--verbose : Shows the results of *EACH* individual game -g N |--games N : This option limits the number of games that will be parsed. This is useful for comparing an on-going Nunn-style match with a previous match. "; main: { my (@file); &initialize(); &parse_the_command_line(); open(FILE, $Filename) || die "Unable to open file $Filename\n"; chomp(@file = ); close(FILE); ## -- Main task processing or subcall go here. my (@names, %won, %lost, %draws); my (@line, @game, %unique_game); my ($extract_moves, $cnt, $unique_games_count) = (0,0,0); my ($event, $site, $date, $round); my ($white, $black, $result, $timecontrol); my ($fen, $setup) = (0,0); my ($moves); # Setup the regexp objects so we can remove nested braces. # The braces we are looking to remove are: "[], (), {}, <>". my ($brace0, $brace1, $brace2, $brace3); # Now setup the various regex objects to support the recursive # regular expression call (note, this is an example of a "dynamic" # regex construct- *very* powerful and a bit confusing!!). $brace0= qr/ \[ (?: (?> [^\[\]]+ ) | (??{$brace0}) )* \]/x; $brace1= qr/ \( (?: (?> [^()]+ ) | (??{$brace1}) )* \)/x; $brace2= qr/ \{ (?: (?> [^{}]+ ) | (??{$brace2}) )* \}/x; $brace3= qr/ \< (?: (?> [^<>]+ ) | (??{$brace3}) )* \>/x; foreach (@file) { @line = split; # Skip empty lines. if (!defined($line[0])) { next; } # eliminate leading and trailing white space. s/^\s+//; s/\s+$//; # Grab the header info. /^\[Event\s+\"(.+)\"\]/ && do { $event = $1; next; }; /^\[Site\s+\"(.+)\"\]/ && do { $site = $1; next; }; /^\[Date\s+\"(.+)\"\]/ && do { $date = $1; next; }; /^\[Round\s+\"(.+)\"\]/ && do { $round = $1; next; }; /^\[White\s+\"(.+)\"\]/ && do { $white= $1; # Strip any leading white space off the white player's name. $white =~ s/^\s+//; $white =~ s/\s+$//; next; }; /^\[Black\s+\"(.+)\"\]/ && do { $black= $1; # Ditto for black. $black =~ s/^\s+//; $black =~ s/\s+$//; next; }; # Key off the result field to reset things. /^\[Result\s+\"(.+)\"\]/ && do { $result= $1; if ($result eq "1/2-1/2") { $result= "1/2"; } $extract_moves = 1; $moves = (); # Clear the game moves. next; }; /^\[TimeControl\s+\"(.+)\"\]/ && do { $timecontrol= $1; next; }; /^\[FEN\s+\"(.+)\"\]/ && do { $fen = $1; next; }; /^\[SetUp\s+\"(.+)\"\]/ && do { $setup= 1; next; }; if ($extract_moves) { $moves .= $_; $moves .= " "; # Add a space to the end of the line. # Are we done extracting moves (key off the result field)? /1\/2-1\/2$/ && do { $extract_moves= 0; }; /1-0$/ && do { $extract_moves= 0; }; /0-1$/ && do { $extract_moves= 0; }; if (!$extract_moves) { # Remove the "$#" tokens Chessbase likes to add. $moves =~ s/\$\d+//sg; # Now strip any PGN comments and/or any [...] ID fields # inadvertently added to the game moves. $moves =~ s/$brace0//sg; $moves =~ s/$brace1//sg; $moves =~ s/$brace2//sg; $moves =~ s/$brace3//sg; # Get rid of the result at the end of the moves. $moves =~ s/1-0//; $moves =~ s/0-1//; $moves =~ s/1\/2-1\/2//; # Replace all "#... move" with "# move". Usually, these will # be left over from comments that were removed. $moves =~ s/(\d+)\.\.\.//sg; # Now replace all multiple spaces with one space for clarity. $moves =~ s/\s+/ /sg; # If we've enabled verbose (debug) mode print the moves. printf "Game %d\n [%s]\n", $cnt+1, $moves if $Debug; # Save the game and it's result $game[$cnt] = sprintf("%22s %s %-16s", $white, $result, $black); # Save the game if it's not a duplicate. if (defined($unique_game{$moves})) { $game[$cnt] .= sprintf(" <== DUPLICATE of game %d", $unique_game{$moves}); } else { $unique_game{$moves} = $cnt+1; $unique_games_count++; push(@names, $white); push(@names, $black); if ($result eq "1/2") { $draws{$white}++; $draws{$black}++; } elsif ($result eq "1-0") { $won{$white}++; $lost{$black}++; } elsif ($result eq "0-1") { $won{$black}++; $lost{$white}++; } } $cnt++; # Did the user set a limit on the number of games to parse? if ($cnt == $GameCnt) { last; } } next; } } # Extract out the unique player names my %seen = (); my @player = sort(grep {!$seen{$_}++} @names); # Find the length of the longest player's name. my ($player_max_width)=(0); foreach (@player) { if (length($_) > $player_max_width) { $player_max_width = length($_); } } $player_max_width += 2; # Add a bit of extra padding. my $line_width= $player_max_width+42; # Print the results of the individual games. if ($Verbose) { print "\n"; printf "%s\n", "="x${line_width}; print " Individual Game Results\n"; printf "%s\n", "="x${line_width}; for (my $i=0; $i <= $#game; $i++) { printf "%3d. %s\n", $i+1, $game[$i]; } } print "\n"; printf "%s\n", "="x${line_width}; printf " Total %s: $cnt ", $GameCnt ? "parsed" : "played"; print "(unique games: $unique_games_count)\n"; print " Note, only unique games are scored.\n"; printf "%s\n", "="x${line_width}; printf "%${player_max_width}s", "Player"; print " Wins Losses Draws Score Percent\n"; printf "%s\n", "="x${line_width}; for (my $i=0; $i <= $#player; $i++) { my $idx = $player[$i]; printf "%${player_max_width}s", $idx; if (!defined($won{$idx})) { $won{$idx} = 0; } if (!defined($lost{$idx})) { $lost{$idx} = 0; } if (!defined($draws{$idx})) { $draws{$idx} = 0; } # And now show the final tally. printf " %3d ", $won{$idx}; printf " %3d ", $lost{$idx}; printf " %3d ", $draws{$idx}; # Calculate the score and the total points for this player. my $score= $won{$idx} + 0.5 * $draws{$idx}; my $total_games= $won{$idx} + $lost{$idx} + $draws{$idx}; my $percent= 100.0 * ($score / $total_games); printf " %4.1f/%-3d (%05.2f%%)", $score, $total_games, $percent; print "\n"; } #------------------------------------------------------------------ # Calculate the ELO difference if this was a match between # two programs. #------------------------------------------------------------------ # Difference in ELO rating between two players is: # # ELO difference = -400 * log10(1 / wp - 1) # # "wp" is winning percentage must be in the range 0.0 < wp < 1.0, # calculated as: wp = points won / points lost #------------------------------------------------------------------ if (1 == $#player) { my $idxA = $player[0]; my $idxB = $player[1]; my $total = $won{$idxA} + $lost{$idxA} + $draws{$idxA}; my $scoreA = $won{$idxA} + 0.5 * $draws{$idxA}; my $scoreB = $won{$idxB} + 0.5 * $draws{$idxB}; my $wp = ($scoreA > $scoreB) ? $scoreA : $scoreB; my $elo = -400.0 * log10(1.0/($wp/$total) - 1.0); printf "\nWinning percentage: %.2f\n", 100.0*$wp/$total if $Debug; printf "\nELO Diff: %.2f\n", $elo; } print "\n"; } ################################################################################ # Subroutine definitions ################################################################################ sub initialize { # Don't buffer STDOUT. $| = 1; # Remove any CVS/RCS dollar signs hanging around the revision number. $Version =~ s/\$//g; } ################################################################################ # Simple routine to generate the base 10 log of a value using the # natural logrithm function and a bit of basic algebra. ################################################################################ sub log10 { my $n = shift; if ($n <= 0) { return 0; } return log($n) / log(10); } ################################################################################ # Grab any command-line arguments and set some of the global variables used. ################################################################################ sub parse_the_command_line { while (@ARGV) { $_ = shift(@ARGV); /^-h|-help|--help/i && do { print $Usage; exit 0; }; /^-d|--debug$/i && do { $Debug = 1; next; }; /-v|--verbose$/i && do { $Verbose = 1; next; }; # This option limits the number of games we look at # in a PGN file. /^-g|--games$/i && do { $_ = shift(@ARGV); $GameCnt = $_; next; }; # Grab the file's name. if (-e $_) { $Filename = $_; } } if (!defined($Filename)) { print "\n"; print "***E : No input file\n"; print $Usage; exit 0; } print "Filename: $Filename\n" if $Debug; }