Initial Commit
This commit is contained in:
584
database/perl/lib/TAP/Parser/Grammar.pm
Normal file
584
database/perl/lib/TAP/Parser/Grammar.pm
Normal file
@@ -0,0 +1,584 @@
|
||||
package TAP::Parser::Grammar;
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use TAP::Parser::ResultFactory ();
|
||||
use TAP::Parser::YAMLish::Reader ();
|
||||
|
||||
use base 'TAP::Object';
|
||||
|
||||
=head1 NAME
|
||||
|
||||
TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
Version 3.42
|
||||
|
||||
=cut
|
||||
|
||||
our $VERSION = '3.42';
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use TAP::Parser::Grammar;
|
||||
my $grammar = $self->make_grammar({
|
||||
iterator => $tap_parser_iterator,
|
||||
parser => $tap_parser,
|
||||
version => 12,
|
||||
});
|
||||
|
||||
my $result = $grammar->tokenize;
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
C<TAP::Parser::Grammar> tokenizes lines from a L<TAP::Parser::Iterator> and
|
||||
constructs L<TAP::Parser::Result> subclasses to represent the tokens.
|
||||
|
||||
Do not attempt to use this class directly. It won't make sense. It's mainly
|
||||
here to ensure that we will be able to have pluggable grammars when TAP is
|
||||
expanded at some future date (plus, this stuff was really cluttering the
|
||||
parser).
|
||||
|
||||
=head1 METHODS
|
||||
|
||||
=head2 Class Methods
|
||||
|
||||
=head3 C<new>
|
||||
|
||||
my $grammar = TAP::Parser::Grammar->new({
|
||||
iterator => $iterator,
|
||||
parser => $parser,
|
||||
version => $version,
|
||||
});
|
||||
|
||||
Returns L<TAP::Parser> grammar object that will parse the TAP stream from the
|
||||
specified iterator. Both C<iterator> and C<parser> are required arguments.
|
||||
If C<version> is not set it defaults to C<12> (see L</set_version> for more
|
||||
details).
|
||||
|
||||
=cut
|
||||
|
||||
# new() implementation supplied by TAP::Object
|
||||
sub _initialize {
|
||||
my ( $self, $args ) = @_;
|
||||
$self->{iterator} = $args->{iterator}; # TODO: accessor
|
||||
$self->{iterator} ||= $args->{stream}; # deprecated
|
||||
$self->{parser} = $args->{parser}; # TODO: accessor
|
||||
$self->set_version( $args->{version} || 12 );
|
||||
return $self;
|
||||
}
|
||||
|
||||
my %language_for;
|
||||
|
||||
{
|
||||
|
||||
# XXX the 'not' and 'ok' might be on separate lines in VMS ...
|
||||
my $ok = qr/(?:not )?ok\b/;
|
||||
my $num = qr/\d+/;
|
||||
|
||||
my %v12 = (
|
||||
version => {
|
||||
syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
|
||||
handler => sub {
|
||||
my ( $self, $line ) = @_;
|
||||
my $version = $1;
|
||||
return $self->_make_version_token( $line, $version, );
|
||||
},
|
||||
},
|
||||
plan => {
|
||||
syntax => qr/^1\.\.(\d+)\s*(.*)\z/,
|
||||
handler => sub {
|
||||
my ( $self, $line ) = @_;
|
||||
my ( $tests_planned, $tail ) = ( $1, $2 );
|
||||
my $explanation = undef;
|
||||
my $skip = '';
|
||||
|
||||
if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
|
||||
my @todo = split /\s+/, _trim($1);
|
||||
return $self->_make_plan_token(
|
||||
$line, $tests_planned, 'TODO',
|
||||
'', \@todo
|
||||
);
|
||||
}
|
||||
elsif ( 0 == $tests_planned ) {
|
||||
$skip = 'SKIP';
|
||||
|
||||
# If we can't match # SKIP the directive should be undef.
|
||||
($explanation) = $tail =~ /^#\s*SKIP\S*\s+(.*)/i;
|
||||
}
|
||||
elsif ( $tail !~ /^\s*$/ ) {
|
||||
return $self->_make_unknown_token($line);
|
||||
}
|
||||
|
||||
$explanation = '' unless defined $explanation;
|
||||
|
||||
return $self->_make_plan_token(
|
||||
$line, $tests_planned, $skip,
|
||||
$explanation, []
|
||||
);
|
||||
|
||||
},
|
||||
},
|
||||
|
||||
# An optimization to handle the most common test lines without
|
||||
# directives.
|
||||
simple_test => {
|
||||
syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
|
||||
handler => sub {
|
||||
my ( $self, $line ) = @_;
|
||||
my ( $ok, $num, $desc ) = ( $1, $2, $3 );
|
||||
|
||||
return $self->_make_test_token(
|
||||
$line, $ok, $num,
|
||||
$desc
|
||||
);
|
||||
},
|
||||
},
|
||||
test => {
|
||||
syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
|
||||
handler => sub {
|
||||
my ( $self, $line ) = @_;
|
||||
my ( $ok, $num, $desc ) = ( $1, $2, $3 );
|
||||
my ( $dir, $explanation ) = ( '', '' );
|
||||
if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
|
||||
\# \s* (SKIP|TODO) \b \s* (.*) $/ix
|
||||
)
|
||||
{
|
||||
( $desc, $dir, $explanation ) = ( $1, $2, $3 );
|
||||
}
|
||||
return $self->_make_test_token(
|
||||
$line, $ok, $num, $desc,
|
||||
$dir, $explanation
|
||||
);
|
||||
},
|
||||
},
|
||||
comment => {
|
||||
syntax => qr/^#(.*)/,
|
||||
handler => sub {
|
||||
my ( $self, $line ) = @_;
|
||||
my $comment = $1;
|
||||
return $self->_make_comment_token( $line, $comment );
|
||||
},
|
||||
},
|
||||
bailout => {
|
||||
syntax => qr/^\s*Bail out!\s*(.*)/,
|
||||
handler => sub {
|
||||
my ( $self, $line ) = @_;
|
||||
my $explanation = $1;
|
||||
return $self->_make_bailout_token(
|
||||
$line,
|
||||
$explanation
|
||||
);
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
my %v13 = (
|
||||
%v12,
|
||||
plan => {
|
||||
syntax => qr/^1\.\.(\d+)\s*(?:\s*#\s*SKIP\b(.*))?\z/i,
|
||||
handler => sub {
|
||||
my ( $self, $line ) = @_;
|
||||
my ( $tests_planned, $explanation ) = ( $1, $2 );
|
||||
my $skip
|
||||
= ( 0 == $tests_planned || defined $explanation )
|
||||
? 'SKIP'
|
||||
: '';
|
||||
$explanation = '' unless defined $explanation;
|
||||
return $self->_make_plan_token(
|
||||
$line, $tests_planned, $skip,
|
||||
$explanation, []
|
||||
);
|
||||
},
|
||||
},
|
||||
yaml => {
|
||||
syntax => qr/^ (\s+) (---.*) $/x,
|
||||
handler => sub {
|
||||
my ( $self, $line ) = @_;
|
||||
my ( $pad, $marker ) = ( $1, $2 );
|
||||
return $self->_make_yaml_token( $pad, $marker );
|
||||
},
|
||||
},
|
||||
pragma => {
|
||||
syntax =>
|
||||
qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
|
||||
handler => sub {
|
||||
my ( $self, $line ) = @_;
|
||||
my $pragmas = $1;
|
||||
return $self->_make_pragma_token( $line, $pragmas );
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
%language_for = (
|
||||
'12' => {
|
||||
tokens => \%v12,
|
||||
},
|
||||
'13' => {
|
||||
tokens => \%v13,
|
||||
setup => sub {
|
||||
shift->{iterator}->handle_unicode;
|
||||
},
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
##############################################################################
|
||||
|
||||
=head2 Instance Methods
|
||||
|
||||
=head3 C<set_version>
|
||||
|
||||
$grammar->set_version(13);
|
||||
|
||||
Tell the grammar which TAP syntax version to support. The lowest
|
||||
supported version is 12. Although 'TAP version' isn't valid version 12
|
||||
syntax it is accepted so that higher version numbers may be parsed.
|
||||
|
||||
=cut
|
||||
|
||||
sub set_version {
|
||||
my $self = shift;
|
||||
my $version = shift;
|
||||
|
||||
if ( my $language = $language_for{$version} ) {
|
||||
$self->{version} = $version;
|
||||
$self->{tokens} = $language->{tokens};
|
||||
|
||||
if ( my $setup = $language->{setup} ) {
|
||||
$self->$setup();
|
||||
}
|
||||
|
||||
$self->_order_tokens;
|
||||
}
|
||||
else {
|
||||
require Carp;
|
||||
Carp::croak("Unsupported syntax version: $version");
|
||||
}
|
||||
}
|
||||
|
||||
# Optimization to put the most frequent tokens first.
|
||||
sub _order_tokens {
|
||||
my $self = shift;
|
||||
|
||||
my %copy = %{ $self->{tokens} };
|
||||
my @ordered_tokens = grep {defined}
|
||||
map { delete $copy{$_} } qw( simple_test test comment plan );
|
||||
push @ordered_tokens, values %copy;
|
||||
|
||||
$self->{ordered_tokens} = \@ordered_tokens;
|
||||
}
|
||||
|
||||
##############################################################################
|
||||
|
||||
=head3 C<tokenize>
|
||||
|
||||
my $token = $grammar->tokenize;
|
||||
|
||||
This method will return a L<TAP::Parser::Result> object representing the
|
||||
current line of TAP.
|
||||
|
||||
=cut
|
||||
|
||||
sub tokenize {
|
||||
my $self = shift;
|
||||
|
||||
my $line = $self->{iterator}->next;
|
||||
unless ( defined $line ) {
|
||||
delete $self->{parser}; # break circular ref
|
||||
return;
|
||||
}
|
||||
|
||||
my $token;
|
||||
|
||||
for my $token_data ( @{ $self->{ordered_tokens} } ) {
|
||||
if ( $line =~ $token_data->{syntax} ) {
|
||||
my $handler = $token_data->{handler};
|
||||
$token = $self->$handler($line);
|
||||
last;
|
||||
}
|
||||
}
|
||||
|
||||
$token = $self->_make_unknown_token($line) unless $token;
|
||||
|
||||
return $self->{parser}->make_result($token);
|
||||
}
|
||||
|
||||
##############################################################################
|
||||
|
||||
=head3 C<token_types>
|
||||
|
||||
my @types = $grammar->token_types;
|
||||
|
||||
Returns the different types of tokens which this grammar can parse.
|
||||
|
||||
=cut
|
||||
|
||||
sub token_types {
|
||||
my $self = shift;
|
||||
return keys %{ $self->{tokens} };
|
||||
}
|
||||
|
||||
##############################################################################
|
||||
|
||||
=head3 C<syntax_for>
|
||||
|
||||
my $syntax = $grammar->syntax_for($token_type);
|
||||
|
||||
Returns a pre-compiled regular expression which will match a chunk of TAP
|
||||
corresponding to the token type. For example (not that you should really pay
|
||||
attention to this, C<< $grammar->syntax_for('comment') >> will return
|
||||
C<< qr/^#(.*)/ >>.
|
||||
|
||||
=cut
|
||||
|
||||
sub syntax_for {
|
||||
my ( $self, $type ) = @_;
|
||||
return $self->{tokens}->{$type}->{syntax};
|
||||
}
|
||||
|
||||
##############################################################################
|
||||
|
||||
=head3 C<handler_for>
|
||||
|
||||
my $handler = $grammar->handler_for($token_type);
|
||||
|
||||
Returns a code reference which, when passed an appropriate line of TAP,
|
||||
returns the lexed token corresponding to that line. As a result, the basic
|
||||
TAP parsing loop looks similar to the following:
|
||||
|
||||
my @tokens;
|
||||
my $grammar = TAP::Grammar->new;
|
||||
LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
|
||||
for my $type ( $grammar->token_types ) {
|
||||
my $syntax = $grammar->syntax_for($type);
|
||||
if ( $line =~ $syntax ) {
|
||||
my $handler = $grammar->handler_for($type);
|
||||
push @tokens => $grammar->$handler($line);
|
||||
next LINE;
|
||||
}
|
||||
}
|
||||
push @tokens => $grammar->_make_unknown_token($line);
|
||||
}
|
||||
|
||||
=cut
|
||||
|
||||
sub handler_for {
|
||||
my ( $self, $type ) = @_;
|
||||
return $self->{tokens}->{$type}->{handler};
|
||||
}
|
||||
|
||||
sub _make_version_token {
|
||||
my ( $self, $line, $version ) = @_;
|
||||
return {
|
||||
type => 'version',
|
||||
raw => $line,
|
||||
version => $version,
|
||||
};
|
||||
}
|
||||
|
||||
sub _make_plan_token {
|
||||
my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
|
||||
|
||||
if ( $directive eq 'SKIP'
|
||||
&& 0 != $tests_planned
|
||||
&& $self->{version} < 13 )
|
||||
{
|
||||
warn
|
||||
"Specified SKIP directive in plan but more than 0 tests ($line)\n";
|
||||
}
|
||||
|
||||
return {
|
||||
type => 'plan',
|
||||
raw => $line,
|
||||
tests_planned => $tests_planned,
|
||||
directive => $directive,
|
||||
explanation => _trim($explanation),
|
||||
todo_list => $todo,
|
||||
};
|
||||
}
|
||||
|
||||
sub _make_test_token {
|
||||
my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
|
||||
return {
|
||||
ok => $ok,
|
||||
|
||||
# forcing this to be an integer (and not a string) reduces memory
|
||||
# consumption. RT #84939
|
||||
test_num => ( defined $num ? 0 + $num : undef ),
|
||||
description => _trim($desc),
|
||||
directive => ( defined $dir ? uc $dir : '' ),
|
||||
explanation => _trim($explanation),
|
||||
raw => $line,
|
||||
type => 'test',
|
||||
};
|
||||
}
|
||||
|
||||
sub _make_unknown_token {
|
||||
my ( $self, $line ) = @_;
|
||||
return {
|
||||
raw => $line,
|
||||
type => 'unknown',
|
||||
};
|
||||
}
|
||||
|
||||
sub _make_comment_token {
|
||||
my ( $self, $line, $comment ) = @_;
|
||||
return {
|
||||
type => 'comment',
|
||||
raw => $line,
|
||||
comment => _trim($comment)
|
||||
};
|
||||
}
|
||||
|
||||
sub _make_bailout_token {
|
||||
my ( $self, $line, $explanation ) = @_;
|
||||
return {
|
||||
type => 'bailout',
|
||||
raw => $line,
|
||||
bailout => _trim($explanation)
|
||||
};
|
||||
}
|
||||
|
||||
sub _make_yaml_token {
|
||||
my ( $self, $pad, $marker ) = @_;
|
||||
|
||||
my $yaml = TAP::Parser::YAMLish::Reader->new;
|
||||
|
||||
my $iterator = $self->{iterator};
|
||||
|
||||
# Construct a reader that reads from our input stripping leading
|
||||
# spaces from each line.
|
||||
my $leader = length($pad);
|
||||
my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
|
||||
my @extra = ($marker);
|
||||
my $reader = sub {
|
||||
return shift @extra if @extra;
|
||||
my $line = $iterator->next;
|
||||
return $2 if $line =~ $strip;
|
||||
return;
|
||||
};
|
||||
|
||||
my $data = $yaml->read($reader);
|
||||
|
||||
# Reconstitute input. This is convoluted. Maybe we should just
|
||||
# record it on the way in...
|
||||
chomp( my $raw = $yaml->get_raw );
|
||||
$raw =~ s/^/$pad/mg;
|
||||
|
||||
return {
|
||||
type => 'yaml',
|
||||
raw => $raw,
|
||||
data => $data
|
||||
};
|
||||
}
|
||||
|
||||
sub _make_pragma_token {
|
||||
my ( $self, $line, $pragmas ) = @_;
|
||||
return {
|
||||
type => 'pragma',
|
||||
raw => $line,
|
||||
pragmas => [ split /\s*,\s*/, _trim($pragmas) ],
|
||||
};
|
||||
}
|
||||
|
||||
sub _trim {
|
||||
my $data = shift;
|
||||
|
||||
return '' unless defined $data;
|
||||
|
||||
$data =~ s/^\s+//;
|
||||
$data =~ s/\s+$//;
|
||||
return $data;
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
=head1 TAP GRAMMAR
|
||||
|
||||
B<NOTE:> This grammar is slightly out of date. There's still some discussion
|
||||
about it and a new one will be provided when we have things better defined.
|
||||
|
||||
The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
|
||||
stream-based protocol. In fact, it's quite legal to have an infinite stream.
|
||||
For the same reason that we don't apply regexes to streams, we're not using a
|
||||
formal grammar here. Instead, we parse the TAP in lines.
|
||||
|
||||
For purposes for forward compatibility, any result which does not match the
|
||||
following grammar is currently referred to as
|
||||
L<TAP::Parser::Result::Unknown>. It is I<not> a parse error.
|
||||
|
||||
A formal grammar would look similar to the following:
|
||||
|
||||
(*
|
||||
For the time being, I'm cheating on the EBNF by allowing
|
||||
certain terms to be defined by POSIX character classes by
|
||||
using the following syntax:
|
||||
|
||||
digit ::= [:digit:]
|
||||
|
||||
As far as I am aware, that's not valid EBNF. Sue me. I
|
||||
didn't know how to write "char" otherwise (Unicode issues).
|
||||
Suggestions welcome.
|
||||
*)
|
||||
|
||||
tap ::= version? { comment | unknown } leading_plan lines
|
||||
|
|
||||
lines trailing_plan {comment}
|
||||
|
||||
version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
|
||||
|
||||
leading_plan ::= plan skip_directive? "\n"
|
||||
|
||||
trailing_plan ::= plan "\n"
|
||||
|
||||
plan ::= '1..' nonNegativeInteger
|
||||
|
||||
lines ::= line {line}
|
||||
|
||||
line ::= (comment | test | unknown | bailout ) "\n"
|
||||
|
||||
test ::= status positiveInteger? description? directive?
|
||||
|
||||
status ::= 'not '? 'ok '
|
||||
|
||||
description ::= (character - (digit | '#')) {character - '#'}
|
||||
|
||||
directive ::= todo_directive | skip_directive
|
||||
|
||||
todo_directive ::= hash_mark 'TODO' ' ' {character}
|
||||
|
||||
skip_directive ::= hash_mark 'SKIP' ' ' {character}
|
||||
|
||||
comment ::= hash_mark {character}
|
||||
|
||||
hash_mark ::= '#' {' '}
|
||||
|
||||
bailout ::= 'Bail out!' {character}
|
||||
|
||||
unknown ::= { (character - "\n") }
|
||||
|
||||
(* POSIX character classes and other terminals *)
|
||||
|
||||
digit ::= [:digit:]
|
||||
character ::= ([:print:] - "\n")
|
||||
positiveInteger ::= ( digit - '0' ) {digit}
|
||||
nonNegativeInteger ::= digit {digit}
|
||||
|
||||
=head1 SUBCLASSING
|
||||
|
||||
Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
|
||||
|
||||
If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to
|
||||
do is read through the code. There's no easy way of summarizing it here.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<TAP::Object>,
|
||||
L<TAP::Parser>,
|
||||
L<TAP::Parser::Iterator>,
|
||||
L<TAP::Parser::Result>,
|
||||
|
||||
=cut
|
||||
Reference in New Issue
Block a user