Initial Commit

2025-12-03 16:38:10 +01:00
parent c5e26bf594
commit b732d8d4b5
17680 changed files with 5977495 additions and 2 deletions
--- a/database/perl/lib/TAP/Parser/Grammar.pm
+++ b/database/perl/lib/TAP/Parser/Grammar.pm
@@ -0,0 +1,584 @@
+package TAP::Parser::Grammar;
+
+use strict;
+use warnings;
+
+use TAP::Parser::ResultFactory   ();
+use TAP::Parser::YAMLish::Reader ();
+
+use base 'TAP::Object';
+
+=head1 NAME
+
+TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
+
+=head1 VERSION
+
+Version 3.42
+
+=cut
+
+our $VERSION = '3.42';
+
+=head1 SYNOPSIS
+
+  use TAP::Parser::Grammar;
+  my $grammar = $self->make_grammar({
+    iterator => $tap_parser_iterator,
+    parser   => $tap_parser,
+    version  => 12,
+  });
+
+  my $result = $grammar->tokenize;
+
+=head1 DESCRIPTION
+
+C<TAP::Parser::Grammar> tokenizes lines from a L<TAP::Parser::Iterator> and
+constructs L<TAP::Parser::Result> subclasses to represent the tokens.
+
+Do not attempt to use this class directly.  It won't make sense.  It's mainly
+here to ensure that we will be able to have pluggable grammars when TAP is
+expanded at some future date (plus, this stuff was really cluttering the
+parser).
+
+=head1 METHODS
+
+=head2 Class Methods
+
+=head3 C<new>
+
+  my $grammar = TAP::Parser::Grammar->new({
+      iterator => $iterator,
+      parser   => $parser,
+      version  => $version,
+  });
+
+Returns L<TAP::Parser> grammar object that will parse the TAP stream from the
+specified iterator.  Both C<iterator> and C<parser> are required arguments.
+If C<version> is not set it defaults to C<12> (see L</set_version> for more
+details).
+
+=cut
+
+# new() implementation supplied by TAP::Object
+sub _initialize {
+    my ( $self, $args ) = @_;
+    $self->{iterator} = $args->{iterator};    # TODO: accessor
+    $self->{iterator} ||= $args->{stream};    # deprecated
+    $self->{parser} = $args->{parser};        # TODO: accessor
+    $self->set_version( $args->{version} || 12 );
+    return $self;
+}
+
+my %language_for;
+
+{
+
+    # XXX the 'not' and 'ok' might be on separate lines in VMS ...
+    my $ok  = qr/(?:not )?ok\b/;
+    my $num = qr/\d+/;
+
+    my %v12 = (
+        version => {
+            syntax  => qr/^TAP\s+version\s+(\d+)\s*\z/i,
+            handler => sub {
+                my ( $self, $line ) = @_;
+                my $version = $1;
+                return $self->_make_version_token( $line, $version, );
+            },
+        },
+        plan => {
+            syntax  => qr/^1\.\.(\d+)\s*(.*)\z/,
+            handler => sub {
+                my ( $self, $line ) = @_;
+                my ( $tests_planned, $tail ) = ( $1, $2 );
+                my $explanation = undef;
+                my $skip        = '';
+
+                if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
+                    my @todo = split /\s+/, _trim($1);
+                    return $self->_make_plan_token(
+                        $line, $tests_planned, 'TODO',
+                        '',    \@todo
+                    );
+                }
+                elsif ( 0 == $tests_planned ) {
+                    $skip = 'SKIP';
+
+                    # If we can't match # SKIP the directive should be undef.
+                    ($explanation) = $tail =~ /^#\s*SKIP\S*\s+(.*)/i;
+                }
+                elsif ( $tail !~ /^\s*$/ ) {
+                    return $self->_make_unknown_token($line);
+                }
+
+                $explanation = '' unless defined $explanation;
+
+                return $self->_make_plan_token(
+                    $line, $tests_planned, $skip,
+                    $explanation, []
+                );
+
+            },
+        },
+
+        # An optimization to handle the most common test lines without
+        # directives.
+        simple_test => {
+            syntax  => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
+            handler => sub {
+                my ( $self, $line ) = @_;
+                my ( $ok, $num, $desc ) = ( $1, $2, $3 );
+
+                return $self->_make_test_token(
+                    $line, $ok, $num,
+                    $desc
+                );
+            },
+        },
+        test => {
+            syntax  => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
+            handler => sub {
+                my ( $self, $line ) = @_;
+                my ( $ok, $num, $desc ) = ( $1, $2, $3 );
+                my ( $dir, $explanation ) = ( '', '' );
+                if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
+                       \# \s* (SKIP|TODO) \b \s* (.*) $/ix
+                  )
+                {
+                    ( $desc, $dir, $explanation ) = ( $1, $2, $3 );
+                }
+                return $self->_make_test_token(
+                    $line, $ok, $num, $desc,
+                    $dir,  $explanation
+                );
+            },
+        },
+        comment => {
+            syntax  => qr/^#(.*)/,
+            handler => sub {
+                my ( $self, $line ) = @_;
+                my $comment = $1;
+                return $self->_make_comment_token( $line, $comment );
+            },
+        },
+        bailout => {
+            syntax  => qr/^\s*Bail out!\s*(.*)/,
+            handler => sub {
+                my ( $self, $line ) = @_;
+                my $explanation = $1;
+                return $self->_make_bailout_token(
+                    $line,
+                    $explanation
+                );
+            },
+        },
+    );
+
+    my %v13 = (
+        %v12,
+        plan => {
+            syntax  => qr/^1\.\.(\d+)\s*(?:\s*#\s*SKIP\b(.*))?\z/i,
+            handler => sub {
+                my ( $self, $line ) = @_;
+                my ( $tests_planned, $explanation ) = ( $1, $2 );
+                my $skip
+                  = ( 0 == $tests_planned || defined $explanation )
+                  ? 'SKIP'
+                  : '';
+                $explanation = '' unless defined $explanation;
+                return $self->_make_plan_token(
+                    $line, $tests_planned, $skip,
+                    $explanation, []
+                );
+            },
+        },
+        yaml => {
+            syntax  => qr/^ (\s+) (---.*) $/x,
+            handler => sub {
+                my ( $self, $line ) = @_;
+                my ( $pad, $marker ) = ( $1, $2 );
+                return $self->_make_yaml_token( $pad, $marker );
+            },
+        },
+        pragma => {
+            syntax =>
+              qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
+            handler => sub {
+                my ( $self, $line ) = @_;
+                my $pragmas = $1;
+                return $self->_make_pragma_token( $line, $pragmas );
+            },
+        },
+    );
+
+    %language_for = (
+        '12' => {
+            tokens => \%v12,
+        },
+        '13' => {
+            tokens => \%v13,
+            setup  => sub {
+                shift->{iterator}->handle_unicode;
+            },
+        },
+    );
+}
+
+##############################################################################
+
+=head2 Instance Methods
+
+=head3 C<set_version>
+
+  $grammar->set_version(13);
+
+Tell the grammar which TAP syntax version to support. The lowest
+supported version is 12. Although 'TAP version' isn't valid version 12
+syntax it is accepted so that higher version numbers may be parsed.
+
+=cut
+
+sub set_version {
+    my $self    = shift;
+    my $version = shift;
+
+    if ( my $language = $language_for{$version} ) {
+        $self->{version} = $version;
+        $self->{tokens}  = $language->{tokens};
+
+        if ( my $setup = $language->{setup} ) {
+            $self->$setup();
+        }
+
+        $self->_order_tokens;
+    }
+    else {
+        require Carp;
+        Carp::croak("Unsupported syntax version: $version");
+    }
+}
+
+# Optimization to put the most frequent tokens first.
+sub _order_tokens {
+    my $self = shift;
+
+    my %copy = %{ $self->{tokens} };
+    my @ordered_tokens = grep {defined}
+      map { delete $copy{$_} } qw( simple_test test comment plan );
+    push @ordered_tokens, values %copy;
+
+    $self->{ordered_tokens} = \@ordered_tokens;
+}
+
+##############################################################################
+
+=head3 C<tokenize>
+
+  my $token = $grammar->tokenize;
+
+This method will return a L<TAP::Parser::Result> object representing the
+current line of TAP.
+
+=cut
+
+sub tokenize {
+    my $self = shift;
+
+    my $line = $self->{iterator}->next;
+    unless ( defined $line ) {
+        delete $self->{parser};    # break circular ref
+        return;
+    }
+
+    my $token;
+
+    for my $token_data ( @{ $self->{ordered_tokens} } ) {
+        if ( $line =~ $token_data->{syntax} ) {
+            my $handler = $token_data->{handler};
+            $token = $self->$handler($line);
+            last;
+        }
+    }
+
+    $token = $self->_make_unknown_token($line) unless $token;
+
+    return $self->{parser}->make_result($token);
+}
+
+##############################################################################
+
+=head3 C<token_types>
+
+  my @types = $grammar->token_types;
+
+Returns the different types of tokens which this grammar can parse.
+
+=cut
+
+sub token_types {
+    my $self = shift;
+    return keys %{ $self->{tokens} };
+}
+
+##############################################################################
+
+=head3 C<syntax_for>
+
+  my $syntax = $grammar->syntax_for($token_type);
+
+Returns a pre-compiled regular expression which will match a chunk of TAP
+corresponding to the token type.  For example (not that you should really pay
+attention to this, C<< $grammar->syntax_for('comment') >> will return
+C<< qr/^#(.*)/ >>.
+
+=cut
+
+sub syntax_for {
+    my ( $self, $type ) = @_;
+    return $self->{tokens}->{$type}->{syntax};
+}
+
+##############################################################################
+
+=head3 C<handler_for>
+
+  my $handler = $grammar->handler_for($token_type);
+
+Returns a code reference which, when passed an appropriate line of TAP,
+returns the lexed token corresponding to that line.  As a result, the basic
+TAP parsing loop looks similar to the following:
+
+ my @tokens;
+ my $grammar = TAP::Grammar->new;
+ LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
+     for my $type ( $grammar->token_types ) {
+         my $syntax  = $grammar->syntax_for($type);
+         if ( $line =~ $syntax ) {
+             my $handler = $grammar->handler_for($type);
+             push @tokens => $grammar->$handler($line);
+             next LINE;
+         }
+     }
+     push @tokens => $grammar->_make_unknown_token($line);
+ }
+
+=cut
+
+sub handler_for {
+    my ( $self, $type ) = @_;
+    return $self->{tokens}->{$type}->{handler};
+}
+
+sub _make_version_token {
+    my ( $self, $line, $version ) = @_;
+    return {
+        type    => 'version',
+        raw     => $line,
+        version => $version,
+    };
+}
+
+sub _make_plan_token {
+    my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
+
+    if (   $directive eq 'SKIP'
+        && 0 != $tests_planned
+        && $self->{version} < 13 )
+    {
+        warn
+          "Specified SKIP directive in plan but more than 0 tests ($line)\n";
+    }
+
+    return {
+        type          => 'plan',
+        raw           => $line,
+        tests_planned => $tests_planned,
+        directive     => $directive,
+        explanation   => _trim($explanation),
+        todo_list     => $todo,
+    };
+}
+
+sub _make_test_token {
+    my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
+    return {
+        ok          => $ok,
+
+        # forcing this to be an integer (and not a string) reduces memory
+        # consumption. RT #84939
+        test_num    => ( defined $num ? 0 + $num : undef ),
+        description => _trim($desc),
+        directive   => ( defined $dir ? uc $dir : '' ),
+        explanation => _trim($explanation),
+        raw         => $line,
+        type        => 'test',
+    };
+}
+
+sub _make_unknown_token {
+    my ( $self, $line ) = @_;
+    return {
+        raw  => $line,
+        type => 'unknown',
+    };
+}
+
+sub _make_comment_token {
+    my ( $self, $line, $comment ) = @_;
+    return {
+        type    => 'comment',
+        raw     => $line,
+        comment => _trim($comment)
+    };
+}
+
+sub _make_bailout_token {
+    my ( $self, $line, $explanation ) = @_;
+    return {
+        type    => 'bailout',
+        raw     => $line,
+        bailout => _trim($explanation)
+    };
+}
+
+sub _make_yaml_token {
+    my ( $self, $pad, $marker ) = @_;
+
+    my $yaml = TAP::Parser::YAMLish::Reader->new;
+
+    my $iterator = $self->{iterator};
+
+    # Construct a reader that reads from our input stripping leading
+    # spaces from each line.
+    my $leader = length($pad);
+    my $strip  = qr{ ^ (\s{$leader}) (.*) $ }x;
+    my @extra  = ($marker);
+    my $reader = sub {
+        return shift @extra if @extra;
+        my $line = $iterator->next;
+        return $2 if $line =~ $strip;
+        return;
+    };
+
+    my $data = $yaml->read($reader);
+
+    # Reconstitute input. This is convoluted. Maybe we should just
+    # record it on the way in...
+    chomp( my $raw = $yaml->get_raw );
+    $raw =~ s/^/$pad/mg;
+
+    return {
+        type => 'yaml',
+        raw  => $raw,
+        data => $data
+    };
+}
+
+sub _make_pragma_token {
+    my ( $self, $line, $pragmas ) = @_;
+    return {
+        type    => 'pragma',
+        raw     => $line,
+        pragmas => [ split /\s*,\s*/, _trim($pragmas) ],
+    };
+}
+
+sub _trim {
+    my $data = shift;
+
+    return '' unless defined $data;
+
+    $data =~ s/^\s+//;
+    $data =~ s/\s+$//;
+    return $data;
+}
+
+1;
+
+=head1 TAP GRAMMAR
+
+B<NOTE:>  This grammar is slightly out of date.  There's still some discussion
+about it and a new one will be provided when we have things better defined.
+
+The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
+stream-based protocol.  In fact, it's quite legal to have an infinite stream.
+For the same reason that we don't apply regexes to streams, we're not using a
+formal grammar here.  Instead, we parse the TAP in lines.
+
+For purposes for forward compatibility, any result which does not match the
+following grammar is currently referred to as
+L<TAP::Parser::Result::Unknown>.  It is I<not> a parse error.
+
+A formal grammar would look similar to the following:
+
+ (*
+     For the time being, I'm cheating on the EBNF by allowing
+     certain terms to be defined by POSIX character classes by
+     using the following syntax:
+
+       digit ::= [:digit:]
+
+     As far as I am aware, that's not valid EBNF.  Sue me.  I
+     didn't know how to write "char" otherwise (Unicode issues).
+     Suggestions welcome.
+ *)
+
+ tap            ::= version? { comment | unknown } leading_plan lines
+                    |
+                    lines trailing_plan {comment}
+
+ version        ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
+
+ leading_plan   ::= plan skip_directive? "\n"
+
+ trailing_plan  ::= plan "\n"
+
+ plan           ::= '1..' nonNegativeInteger
+
+ lines          ::= line {line}
+
+ line           ::= (comment | test | unknown | bailout ) "\n"
+
+ test           ::= status positiveInteger? description? directive?
+
+ status         ::= 'not '? 'ok '
+
+ description    ::= (character - (digit | '#')) {character - '#'}
+
+ directive      ::= todo_directive | skip_directive
+
+ todo_directive ::= hash_mark 'TODO' ' ' {character}
+
+ skip_directive ::= hash_mark 'SKIP' ' ' {character}
+
+ comment        ::= hash_mark {character}
+
+ hash_mark      ::= '#' {' '}
+
+ bailout        ::= 'Bail out!' {character}
+
+ unknown        ::= { (character - "\n") }
+
+ (* POSIX character classes and other terminals *)
+
+ digit              ::= [:digit:]
+ character          ::= ([:print:] - "\n")
+ positiveInteger    ::= ( digit - '0' ) {digit}
+ nonNegativeInteger ::= digit {digit}
+
+=head1 SUBCLASSING
+
+Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
+
+If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to
+do is read through the code.  There's no easy way of summarizing it here.
+
+=head1 SEE ALSO
+
+L<TAP::Object>,
+L<TAP::Parser>,
+L<TAP::Parser::Iterator>,
+L<TAP::Parser::Result>,
+
+=cut