2415 lines
75 KiB
Perl
2415 lines
75 KiB
Perl
package Perl::PrereqScanner::NotQuiteLite;
|
|
|
|
use strict;
|
|
use warnings;
|
|
use Carp;
|
|
use Perl::PrereqScanner::NotQuiteLite::Context;
|
|
use Perl::PrereqScanner::NotQuiteLite::Util;
|
|
|
|
our $VERSION = '0.9913';
|
|
|
|
our @BUNDLED_PARSERS = qw/
|
|
Aliased AnyMoose Autouse Catalyst ClassAccessor
|
|
ClassAutouse ClassLoad Core Inline KeywordDeclare Later
|
|
Mixin ModuleRuntime MojoBase Moose MooseXDeclare Only
|
|
PackageVariant Plack POE Prefork Superclass Syntax SyntaxCollector
|
|
TestClassMost TestMore TestRequires UniversalVersion Unless
|
|
/;
|
|
our @DEFAULT_PARSERS = qw/Core Moose/;
|
|
|
|
### Helpers For Debugging
|
|
|
|
use constant DEBUG => !!$ENV{PERL_PSNQL_DEBUG} || 0;
|
|
use constant DEBUG_RE => DEBUG > 3 ? 1 : 0;
|
|
|
|
sub _debug {}
|
|
sub _error {}
|
|
sub _dump_stack {}
|
|
|
|
if (DEBUG) {
|
|
require Data::Dump; Data::Dump->import(qw/dump/);
|
|
no warnings 'redefine';
|
|
*_debug = sub { print @_, "\n" };
|
|
*_error = sub { print @_, "*" x 50, "\n" };
|
|
*_dump_stack = sub {
|
|
my ($c, $char) = @_;
|
|
my $stacked = join '', map {($_->[2] ? "($_->[2])" : '').$_->[0]} @{$c->{stack}};
|
|
_debug("$char \t\t\t\t stacked: $stacked");
|
|
};
|
|
}
|
|
|
|
sub _match_error {
|
|
my $rstr = shift;
|
|
$@ = shift() . substr($$rstr, pos($$rstr), 100);
|
|
return;
|
|
}
|
|
|
|
### Global Variables To Be Sorted Out Later
|
|
|
|
my %unsupported_packages = map {$_ => 1} qw(
|
|
);
|
|
|
|
my %sub_keywords = (
|
|
'Function::Parameters' => [qw/fun method/],
|
|
'TryCatch' => [qw/try catch/],
|
|
);
|
|
|
|
my %filter_modules = (
|
|
tt => sub { ${$_[0]} =~ s|\G.+?no\s*tt\s*;||s; 0; },
|
|
'Text::RewriteRules' => sub { ${$_[0]} =~ s|RULES.+?ENDRULES\n||gs; 1 },
|
|
);
|
|
|
|
my %is_conditional = map {$_ => 1} qw(
|
|
if elsif unless else given when
|
|
for foreach while until
|
|
);
|
|
|
|
my %ends_expr = map {$_ => 1} qw(
|
|
and or xor
|
|
if else elsif unless when default
|
|
for foreach while until
|
|
&& || !~ =~ = += -= *= /= **= //= %= ^= |=
|
|
> < >= <= <> <=> cmp ge gt le lt eq ne ? :
|
|
);
|
|
|
|
my %has_sideff = map {$_ => 1} qw(
|
|
and or xor && || //
|
|
if unless when
|
|
);
|
|
|
|
# keywords that allow /regexp/ to follow directly
|
|
my %regexp_may_follow = map {$_ => 1} qw(
|
|
and or cmp if elsif unless eq ne
|
|
gt lt ge le for while until grep map not split when
|
|
return
|
|
);
|
|
|
|
my $re_namespace = qr/(?:::|')?(?:[a-zA-Z0-9_]+(?:(?:::|')[a-zA-Z0-9_]+)*)/;
|
|
my $re_nonblock_chars = qr/[^\\\(\)\{\}\[\]\<\>\/"'`#q~,\s]*/;
|
|
my $re_variable = qr/
|
|
(?:$re_namespace)
|
|
| (?:\^[A-Z\]])
|
|
| (?:\{\^[A-Z0-9_]+\})
|
|
| (?:[_"\(\)<\\\&`'\+\-,.\/\%#:=~\|?!\@\*\[\]\^])
|
|
/x;
|
|
my $re_pod = qr/(
|
|
=[a-zA-Z]\w*\b
|
|
.*?
|
|
(?:(?:\n)
|
|
=cut\b.*?(?:\n|\z)|\z)
|
|
)/sx;
|
|
my $re_comment = qr/(?:\s*#[^\n]*?\n)*(?:\s*#[^\n]*?)(?:\n|$)/s;
|
|
|
|
my $g_re_scalar_variable = qr{\G(\$(?:$re_variable))};
|
|
my $g_re_hash_shortcut = qr{\G(\{\s*(?:[\+\-]?\w+|(['"])[\w\s]+\2|(?:$re_nonblock_chars))\s*(?<!\$)\})};
|
|
my $g_re_prototype = qr{\G(\([^\)]*?\))};
|
|
|
|
my %ReStrInDelims;
|
|
sub _gen_re_str_in_delims {
|
|
my $delim = shift;
|
|
$ReStrInDelims{$delim} ||= do {
|
|
if ($delim eq '\\') {
|
|
qr/(?:[^\\]*(?:(?:\\\\)[^\\]*)*)/s;
|
|
} else {
|
|
$delim = quotemeta $delim;
|
|
qr/(?:[^\\$delim]*(?:\\.[^\\$delim]*)*)/s;
|
|
}
|
|
};
|
|
}
|
|
|
|
my $re_str_in_single_quotes = _gen_re_str_in_delims(q{'});
|
|
my $re_str_in_double_quotes = _gen_re_str_in_delims(q{"});
|
|
my $re_str_in_backticks = _gen_re_str_in_delims(q{`});
|
|
|
|
my %ReStrInDelimsWithEndDelim;
|
|
sub _gen_re_str_in_delims_with_end_delim {
|
|
my $delim = shift;
|
|
$ReStrInDelimsWithEndDelim{$delim} ||= do {
|
|
my $re = _gen_re_str_in_delims($delim);
|
|
qr{$re\Q$delim\E};
|
|
};
|
|
}
|
|
|
|
my %RdelSkip;
|
|
sub _gen_rdel_and_re_skip {
|
|
my $ldel = shift;
|
|
@{$RdelSkip{$ldel} ||= do {
|
|
(my $rdel = $ldel) =~ tr/[({</])}>/;
|
|
my $re_skip = qr{[^\Q$ldel$rdel\E\\]+};
|
|
[$rdel, $re_skip];
|
|
}};
|
|
}
|
|
|
|
my %RegexpShortcut;
|
|
sub _gen_re_regexp_shortcut {
|
|
my ($ldel, $rdel) = @_;
|
|
$RegexpShortcut{$ldel} ||= do {
|
|
$ldel = quotemeta $ldel;
|
|
$rdel = $rdel ? quotemeta $rdel : $ldel;
|
|
qr{(?:[^\\\(\)\{\}\[\]<>$ldel$rdel]*(?:\\.[^\\\(\)\[\]\{\}<>$ldel$rdel]*)*)$rdel};
|
|
};
|
|
}
|
|
|
|
############################
|
|
|
|
my %LOADED;
|
|
|
|
sub new {
|
|
my ($class, %args) = @_;
|
|
|
|
my %mapping;
|
|
my @parsers = $class->_get_parsers($args{parsers});
|
|
for my $parser (@parsers) {
|
|
if (!exists $LOADED{$parser}) {
|
|
eval "require $parser; 1";
|
|
if (my $error = $@) {
|
|
$parser->can('register') or die "Parser Error: $error";
|
|
}
|
|
$LOADED{$parser} = $parser->can('register') ? $parser->register(%args) : undef;
|
|
}
|
|
my $parser_mapping = $LOADED{$parser} or next;
|
|
for my $type (qw/use no keyword method/) {
|
|
next unless exists $parser_mapping->{$type};
|
|
for my $name (keys %{$parser_mapping->{$type}}) {
|
|
$mapping{$type}{$name} = [
|
|
$parser,
|
|
$parser_mapping->{$type}{$name},
|
|
(($type eq 'use' or $type eq 'no') ? ($name) : ()),
|
|
];
|
|
}
|
|
}
|
|
if ($parser->can('register_fqfn')) {
|
|
my $fqfn_mapping = $parser->register_fqfn;
|
|
for my $name (keys %$fqfn_mapping) {
|
|
my ($module) = $name =~ /^(.+)::/;
|
|
$mapping{keyword}{$name} = [
|
|
$parser,
|
|
$fqfn_mapping->{$name},
|
|
$module,
|
|
];
|
|
}
|
|
}
|
|
}
|
|
$args{_} = \%mapping;
|
|
|
|
bless \%args, $class;
|
|
}
|
|
|
|
sub _get_parsers {
|
|
my ($class, $list) = @_;
|
|
my @parsers;
|
|
my %should_ignore;
|
|
for my $parser (@{$list || [qw/:default/]}) {
|
|
if ($parser eq ':installed') {
|
|
require Module::Find;
|
|
push @parsers, Module::Find::findsubmod("$class\::Parser");
|
|
} elsif ($parser eq ':bundled') {
|
|
push @parsers, map {"$class\::Parser::$_"} @BUNDLED_PARSERS;
|
|
} elsif ($parser eq ':default') {
|
|
push @parsers, map {"$class\::Parser::$_"} @DEFAULT_PARSERS;
|
|
} elsif ($parser =~ s/^\+//) {
|
|
push @parsers, $parser;
|
|
} elsif ($parser =~ s/^\-//) {
|
|
$should_ignore{"$class\::Parser\::$parser"} = 1;
|
|
} elsif ($parser =~ /^$class\::Parser::/) {
|
|
push @parsers, $parser;
|
|
} else {
|
|
push @parsers, "$class\::Parser\::$parser";
|
|
}
|
|
}
|
|
grep {!$should_ignore{$_}} @parsers;
|
|
}
|
|
|
|
sub scan_file {
|
|
my ($self, $file) = @_;
|
|
_debug("START SCANNING $file") if DEBUG;
|
|
print STDERR " Scanning $file\n" if $self->{verbose};
|
|
open my $fh, '<', $file or croak "Can't open $file: $!";
|
|
my $code = do { local $/; <$fh> };
|
|
$self->{file} = $file;
|
|
$self->scan_string($code);
|
|
}
|
|
|
|
sub scan_string {
|
|
my ($self, $string) = @_;
|
|
|
|
$string = '' unless defined $string;
|
|
|
|
my $c = Perl::PrereqScanner::NotQuiteLite::Context->new(%$self);
|
|
|
|
if ($self->{quick}) {
|
|
$c->{file_size} = length $string;
|
|
$self->_skim_string($c, \$string) if $c->{file_size} > 30_000;
|
|
}
|
|
|
|
# UTF8 BOM
|
|
if ($string =~ s/\A(\xef\xbb\xbf)//s) {
|
|
utf8::decode($string);
|
|
$c->{decoded} = 1;
|
|
}
|
|
# Other BOMs (TODO: also decode?)
|
|
$string =~ s/\A(\x00\x00\xfe\xff|\xff\xfe\x00\x00|\xfe\xff|\xff\xfe)//s;
|
|
|
|
# normalize
|
|
if ("\n" eq "\015") {
|
|
$string =~ s/(?:\015?\012)/\n/gs;
|
|
} elsif ("\n" eq "\012") {
|
|
$string =~ s/(?:\015\012?)/\n/gs;
|
|
} elsif ("\n" eq "\015\012") {
|
|
$string =~ s/(?:\015(?!\012)|(?<!\015)\012)/\n/gs;
|
|
} else {
|
|
$string =~ s/(?:\015\012|\015|\012)/\n/gs;
|
|
}
|
|
$string =~ s/[ \t]+/ /g;
|
|
$string =~ s/(?: *\n)+/\n/gs;
|
|
|
|
# FIXME
|
|
$c->{stack} = [];
|
|
$c->{errors} = [];
|
|
$c->{callback} = {
|
|
use => \&_use,
|
|
require => \&_require,
|
|
no => \&_no,
|
|
};
|
|
$c->{wants_doc} = 0;
|
|
|
|
pos($string) = 0;
|
|
|
|
{
|
|
local $@;
|
|
eval { $self->_scan($c, \$string, 0) };
|
|
push @{$c->{errors}}, "Scan Error: $@" if $@;
|
|
if ($c->{redo}) {
|
|
delete $c->{redo};
|
|
delete $c->{ended};
|
|
@{$c->{stack}} = ();
|
|
redo;
|
|
}
|
|
}
|
|
|
|
if (@{$c->{stack}} and !$c->{quick}) {
|
|
require Data::Dump;
|
|
push @{$c->{errors}}, Data::Dump::dump($c->{stack});
|
|
}
|
|
|
|
$c->remove_inner_packages_from_requirements;
|
|
$c->merge_perl;
|
|
|
|
$c;
|
|
}
|
|
|
|
sub _skim_string {
|
|
my ($self, $c, $rstr) = @_;
|
|
my $pos = pos($$rstr) || 0;
|
|
my $last_found = 0;
|
|
my $saw_moose;
|
|
my $re = qr/\G.*?\b((?:use|require|no)\s+(?:[A-Za-z][A-Za-z0-9_]*::)*[A-Za-z][A-Za-z0-9_]*)/;
|
|
while(my ($match) = $$rstr =~ /$re/gc) {
|
|
$last_found = pos($$rstr) + length $match;
|
|
if (!$saw_moose and $match =~ /^use\s+(?:Mo(?:o|(?:[ou]se))?X?|MooseX::Declare)\b/) {
|
|
$re = qr/\G.*?\b((?:(?:use|require|no)\s+(?:[A-Za-z][A-Za-z0-9_]*::)*[A-Za-z][A-Za-z0-9_]*)|(?:(?:extends|with)\s+(?:["']|q[a-z]*[^a-zA-Z0-9_])(?:[A-Za-z][A-Za-z0-9_]*::)*[A-Za-z][A-Za-z0-9_]*))/;
|
|
$saw_moose = 1;
|
|
}
|
|
}
|
|
$c->{last_found_by_skimming} = $last_found;
|
|
pos($$rstr) = $pos;
|
|
}
|
|
|
|
sub _scan {
|
|
my ($self, $c, $rstr, $parent_scope) = @_;
|
|
|
|
if (@{$c->{stack}} > 90) {
|
|
_error("deep recursion found");
|
|
$c->{ended} = 1;
|
|
}
|
|
|
|
_dump_stack($c, "BEGIN SCOPE") if DEBUG;
|
|
|
|
# found __DATA|END__ somewhere?
|
|
return $c if $c->{ended};
|
|
|
|
my $wants_doc = $c->{wants_doc};
|
|
my $line_top = 1;
|
|
my $waiting_for_a_block;
|
|
|
|
my $current_scope = 0;
|
|
my ($token, $token_desc, $token_type) = ('', '', '');
|
|
my ($prev_token, $prev_token_type) = ('', '');
|
|
my ($stack, $unstack);
|
|
my (@keywords, @tokens, @scope_tokens);
|
|
my $caller_package;
|
|
my $prepend;
|
|
my ($pos, $c1);
|
|
my $prev_pos = 0;
|
|
while(defined($pos = pos($$rstr))) {
|
|
$token = undef;
|
|
|
|
# cache first letter for better performance
|
|
$c1 = substr($$rstr, $pos, 1);
|
|
|
|
if ($line_top) {
|
|
if ($c1 eq '=') {
|
|
if ($$rstr =~ m/\G($re_pod)/gcsx) {
|
|
($token, $token_desc, $token_type) = ($1, 'POD', '') if $wants_doc;
|
|
next;
|
|
}
|
|
}
|
|
}
|
|
if ($c1 eq "\n") {
|
|
pos($$rstr)++;
|
|
$line_top = 1;
|
|
next;
|
|
}
|
|
|
|
$line_top = 0;
|
|
# ignore whitespaces
|
|
if ($c1 eq ' ') {
|
|
pos($$rstr)++;
|
|
next;
|
|
} elsif ($c1 eq '_') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '_' and $$rstr =~ m/\G(__(?:DATA|END)__\b)(?!\s*=>)/gc) {
|
|
if ($wants_doc) {
|
|
($token, $token_desc, $token_type) = ($1, 'END_OF_CODE', '');
|
|
next;
|
|
} else {
|
|
$c->{ended} = 1;
|
|
last;
|
|
}
|
|
}
|
|
} elsif ($c1 eq '#') {
|
|
if ($$rstr =~ m{\G($re_comment)}gcs) {
|
|
($token, $token_desc, $token_type) = ($1, 'COMMENT', '') if $wants_doc;
|
|
$line_top = 1;
|
|
next;
|
|
}
|
|
} elsif ($c1 eq ';') {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, ';', ';');
|
|
$current_scope |= F_STATEMENT_END|F_EXPR_END;
|
|
next;
|
|
} elsif ($c1 eq '$') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '#') {
|
|
if (substr($$rstr, $pos + 2, 1) eq '{') {
|
|
if ($$rstr =~ m{\G(\$\#\{[\w\s]+\})}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '$#{NAME}', 'EXPR');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 3;
|
|
($token, $token_desc, $token_type) = ('$#{', '$#{', 'EXPR');
|
|
$stack = [$token, $pos, 'VARIABLE'];
|
|
next;
|
|
}
|
|
} elsif ($$rstr =~ m{\G(\$\#(?:$re_namespace))}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '$#NAME', 'EXPR');
|
|
next;
|
|
} elsif ($prev_token_type eq 'ARROW') {
|
|
my $c3 = substr($$rstr, $pos + 2, 1);
|
|
if ($c3 eq '*') {
|
|
pos($$rstr) = $pos + 3;
|
|
($token, $token_desc, $token_type) = ('$#*', 'VARIABLE', 'VARIABLE');
|
|
$c->add_perl('5.020', '->$#*');
|
|
next;
|
|
}
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('$#', 'SPECIAL_VARIABLE', 'EXPR');
|
|
next;
|
|
}
|
|
} elsif ($c2 eq '$') {
|
|
if ($$rstr =~ m{\G(\$(?:\$)+(?:$re_namespace))}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '$$NAME', 'VARIABLE');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('$$', 'SPECIAL_VARIABLE', 'EXPR');
|
|
next;
|
|
}
|
|
} elsif ($c2 eq '{') {
|
|
if ($$rstr =~ m{\G(\$\{[\w\s]+\})}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '${NAME}', 'VARIABLE');
|
|
if ($prev_token_type eq 'KEYWORD' and $c->token_expects_fh_or_block_list($prev_token)) {
|
|
$token_type = '';
|
|
next;
|
|
}
|
|
} elsif ($$rstr =~ m{\G(\$\{\^[A-Z_]+\})}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '${^NAME}', 'VARIABLE');
|
|
if ($token eq '${^CAPTURE}' or $token eq '${^CAPTURE_ALL}') {
|
|
$c->add_perl('5.026', '${^CAPTURE}');
|
|
}
|
|
if ($token eq '${^SAFE_LOCALES}') {
|
|
$c->add_perl('5.028', '${^SAFE_LOCALES}');
|
|
}
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('${', '${', 'VARIABLE');
|
|
$stack = [$token, $pos, 'VARIABLE'];
|
|
}
|
|
if ($parent_scope & F_EXPECTS_BRACKET) {
|
|
$current_scope |= F_SCOPE_END;
|
|
}
|
|
next;
|
|
} elsif ($c2 eq '*' and $prev_token_type eq 'ARROW') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('$*', '$*', 'VARIABLE');
|
|
$c->add_perl('5.020', '->$*');
|
|
next;
|
|
} elsif ($c2 eq '+' or $c2 eq '-') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('$'.$c2, 'SPECIAL_VARIABLE', 'VARIABLE');
|
|
$c->add_perl('5.010', '$'.$c2);
|
|
next;
|
|
} elsif ($$rstr =~ m{$g_re_scalar_variable}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '$NAME', 'VARIABLE');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'VARIABLE');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '@') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '_' and $$rstr =~ m{\G\@_\b}gc) {
|
|
($token, $token_desc, $token_type) = ('@_', 'SPECIAL_VARIABLE', 'VARIABLE');
|
|
next;
|
|
} elsif ($c2 eq '{') {
|
|
if ($$rstr =~ m{\G(\@\{[\w\s]+\})}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '@{NAME}', 'VARIABLE');
|
|
if ($token eq '@{^CAPTURE}' or $token eq '@{^CAPTURE_ALL}') {
|
|
$c->add_perl('5.026', '@{^CAPTURE}');
|
|
}
|
|
} elsif ($$rstr =~ m{\G(\@\{\^[A-Z_]+\})}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '@{^NAME}', 'VARIABLE');
|
|
if ($token eq '@{^CAPTURE}' or $token eq '@{^CAPTURE_ALL}') {
|
|
$c->add_perl('5.026', '@{^CAPTURE}');
|
|
}
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('@{', '@{', 'VARIABLE');
|
|
$stack = [$token, $pos, 'VARIABLE'];
|
|
}
|
|
if ($prev_token_type eq 'ARROW') {
|
|
$c->add_perl('5.020', '->@{}');
|
|
}
|
|
if ($parent_scope & F_EXPECTS_BRACKET) {
|
|
$current_scope |= F_SCOPE_END;
|
|
}
|
|
next;
|
|
} elsif ($c2 eq '$') {
|
|
if ($$rstr =~ m{\G(\@\$(?:$re_namespace))}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '@$NAME', 'VARIABLE');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('@$', '@$', 'VARIABLE');
|
|
next;
|
|
}
|
|
} elsif ($prev_token_type eq 'ARROW') {
|
|
# postderef
|
|
if ($c2 eq '*') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('@*', '@*', 'VARIABLE');
|
|
$c->add_perl('5.020', '->@*');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ('@', '@', 'VARIABLE');
|
|
$c->add_perl('5.020', '->@');
|
|
next;
|
|
}
|
|
} elsif ($c2 eq '[') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('@[', 'SPECIAL_VARIABLE', 'VARIABLE');
|
|
next;
|
|
} elsif ($c2 eq '+' or $c2 eq '-') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('@'.$c2, 'SPECIAL_VARIABLE', 'VARIABLE');
|
|
$c->add_perl('5.010', '@'.$c2);
|
|
next;
|
|
} elsif ($$rstr =~ m{\G(\@(?:$re_namespace))}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '@NAME', 'VARIABLE');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'VARIABLE');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '%') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '{') {
|
|
if ($$rstr =~ m{\G(\%\{[\w\s]+\})}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '%{NAME}', 'VARIABLE');
|
|
} elsif ($$rstr =~ m{\G(\%\{\^[A-Z_]+\})}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '%{^NAME}', 'VARIABLE');
|
|
if ($token eq '%{^CAPTURE}' or $token eq '%{^CAPTURE_ALL}') {
|
|
$c->add_perl('5.026', '%{^CAPTURE}');
|
|
}
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('%{', '%{', 'VARIABLE');
|
|
$stack = [$token, $pos, 'VARIABLE'];
|
|
}
|
|
if ($prev_token_type eq 'ARROW') {
|
|
$c->add_perl('5.020', '->%{');
|
|
}
|
|
if ($parent_scope & F_EXPECTS_BRACKET) {
|
|
$current_scope |= F_SCOPE_END;
|
|
}
|
|
next;
|
|
} elsif ($c2 eq '=') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('%=', '%=', 'OP');
|
|
next;
|
|
} elsif ($$rstr =~ m{\G(\%\$(?:$re_namespace))}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '%$NAME', 'VARIABLE');
|
|
next;
|
|
} elsif ($$rstr =~ m{\G(\%(?:$re_namespace))}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '%NAME', 'VARIABLE');
|
|
next;
|
|
} elsif ($prev_token_type eq 'VARIABLE' or $prev_token_type eq 'EXPR') {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
} elsif ($prev_token_type eq 'ARROW') {
|
|
if ($c2 eq '*') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('%*', '%*', 'VARIABLE');
|
|
$c->add_perl('5.020', '->%*');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ('%', '%', 'VARIABLE');
|
|
$c->add_perl('5.020', '->%');
|
|
next;
|
|
}
|
|
} elsif ($c2 eq '+' or $c2 eq '-') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('%'.$c2, 'SPECIAL_VARIABLE', 'VARIABLE');
|
|
$c->add_perl('5.010', '%'.$c2);
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'VARIABLE');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '*') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '{') {
|
|
if ($prev_token_type eq 'ARROW') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('*{', '*{', 'VARIABLE');
|
|
$c->add_perl('5.020', '->*{}');
|
|
next;
|
|
} elsif ($$rstr =~ m{\G(\*\{[\w\s]+\})}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '*{NAME}', 'VARIABLE');
|
|
if ($prev_token eq 'KEYWORD' and $c->token_expects_fh_or_block_list($prev_token)) {
|
|
$token_type = '';
|
|
next;
|
|
}
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('*{', '*{', 'VARIABLE');
|
|
$stack = [$token, $pos, 'VARIABLE'];
|
|
}
|
|
if ($parent_scope & F_EXPECTS_BRACKET) {
|
|
$current_scope |= F_SCOPE_END;
|
|
}
|
|
next;
|
|
} elsif ($c2 eq '*') {
|
|
if (substr($$rstr, $pos + 2, 1) eq '=') {
|
|
pos($$rstr) = $pos + 3;
|
|
($token, $token_desc, $token_type) = ('**=', '**=', 'OP');
|
|
next;
|
|
} elsif ($prev_token_type eq 'ARROW') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('**', '**', 'VARIABLE');
|
|
$c->add_perl('5.020', '->**');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('**', '**', 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c2 eq '=') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('*=', '*=', 'OP');
|
|
next;
|
|
} elsif ($$rstr =~ m{\G(\*(?:$re_namespace))}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '*NAME', 'VARIABLE');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '&') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '&') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('&&', '&&', 'OP');
|
|
next;
|
|
} elsif ($c2 eq '=') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('&=', '&=', 'OP');
|
|
next;
|
|
} elsif ($c2 eq '{') {
|
|
if ($$rstr =~ m{\G(\&\{[\w\s]+\})}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '&{NAME}', 'EXPR');
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('&{', '&{', 'EXPR');
|
|
$stack = [$token, $pos, 'FUNC'];
|
|
}
|
|
if ($parent_scope & F_EXPECTS_BRACKET) {
|
|
$current_scope |= F_SCOPE_END;
|
|
}
|
|
next;
|
|
} elsif ($c2 eq '.') {
|
|
if (substr($$rstr, $pos + 2, 1) eq '=') {
|
|
pos($$rstr) = $pos + 3;
|
|
($token, $token_desc, $token_type) = ('&.=', '&.=', 'OP');
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('&.', '&.', 'OP');
|
|
}
|
|
$c->add_perl('5.022', '&.');
|
|
next;
|
|
} elsif ($$rstr =~ m{\G(\&(?:$re_namespace))}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '&NAME', 'EXPR');
|
|
next;
|
|
} elsif ($$rstr =~ m{\G(\&\$(?:$re_namespace))}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '&$NAME', 'EXPR');
|
|
next;
|
|
} elsif ($prev_token_type eq 'ARROW') {
|
|
if ($c2 eq '*') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('&*', '&*', 'VARIABLE');
|
|
$c->add_perl('5.020', '->&*');
|
|
next;
|
|
}
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '\\') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '{') {
|
|
if ($$rstr =~ m{\G(\\\{[\w\s]+\})}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '\\{NAME}', 'VARIABLE');
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('\\{', '\\{', 'VARIABLE');
|
|
$stack = [$token, $pos, 'VARIABLE'];
|
|
}
|
|
if ($parent_scope & F_EXPECTS_BRACKET) {
|
|
$current_scope |= F_SCOPE_END;
|
|
}
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, '');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '-') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '>') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('->', 'ARROW', 'ARROW');
|
|
if ($prev_token_type eq 'WORD' or $prev_token_type eq 'KEYWORD') {
|
|
$caller_package = $prev_token;
|
|
$current_scope |= F_KEEP_TOKENS;
|
|
}
|
|
next;
|
|
} elsif ($c2 eq '-') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('--', '--', $prev_token_type);
|
|
next;
|
|
} elsif ($c2 eq '=') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('-=', '-=', 'OP');
|
|
next;
|
|
} elsif ($$rstr =~ m{\G(\-[ABCMORSTWXbcdefgkloprstuwxz]\b)}gc) {
|
|
($token, $token_desc, $token_type) = ($1, 'FILE_TEST', 'EXPR');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq q{"}) {
|
|
if ($$rstr =~ m{\G(?:\"($re_str_in_double_quotes)\")}gcs) {
|
|
($token, $token_desc, $token_type) = ([$1, q{"}], 'STRING', 'STRING');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq q{'}) {
|
|
if ($$rstr =~ m{\G(?:\'($re_str_in_single_quotes)\')}gcs) {
|
|
($token, $token_desc, $token_type) = ([$1, q{'}], 'STRING', 'STRING');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '`') {
|
|
if ($$rstr =~ m{\G(?:\`($re_str_in_backticks)\`)}gcs) {
|
|
($token, $token_desc, $token_type) = ([$1, q{`}], 'BACKTICK', 'EXPR');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '/') {
|
|
if ($prev_token_type eq '' or $prev_token_type eq 'OP' or ($prev_token_type eq 'KEYWORD' and $regexp_may_follow{$prev_token})) { # undoubtedly regexp
|
|
if (my $regexp = $self->_match_regexp0($c, $rstr, $pos, 'm')) {
|
|
($token, $token_desc, $token_type) = ($regexp, 'REGEXP', 'EXPR');
|
|
next;
|
|
} else {
|
|
# the above may fail
|
|
_debug("REGEXP ERROR: $@") if DEBUG;
|
|
pos($$rstr) = $pos;
|
|
}
|
|
}
|
|
if (($prev_token_type eq '' or (!($current_scope & F_EXPR) and $prev_token_type eq 'WORD')) or ($prev_token_type eq 'KEYWORD' and @keywords and $prev_token eq $keywords[-1] and $regexp_may_follow{$prev_token})) {
|
|
|
|
if (my $regexp = $self->_match_regexp0($c, $rstr, $pos)) {
|
|
($token, $token_desc, $token_type) = ($regexp, 'REGEXP', 'EXPR');
|
|
next;
|
|
} else {
|
|
# the above may fail
|
|
_debug("REGEXP ERROR: $@") if DEBUG;
|
|
pos($$rstr) = $pos;
|
|
}
|
|
}
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '/') {
|
|
if (substr($$rstr, $pos + 2, 1) eq '=') {
|
|
pos($$rstr) = $pos + 3;
|
|
($token, $token_desc, $token_type) = ('//=', '//=', 'OP');
|
|
$c->add_perl('5.010', '//=');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('//', '//', 'OP');
|
|
$c->add_perl('5.010', '//');
|
|
next;
|
|
}
|
|
}
|
|
if ($c2 eq '=') { # this may be a part of /=.../
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('/=', '/=', 'OP');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '{') {
|
|
if ($$rstr =~ m{$g_re_hash_shortcut}gc) {
|
|
($token, $token_desc) = ($1, '{EXPR}');
|
|
if ($current_scope & F_EVAL) {
|
|
$current_scope &= MASK_EVAL;
|
|
$c->{eval} = ($current_scope | $parent_scope) & F_EVAL ? 1 : 0;
|
|
}
|
|
if ($parent_scope & F_EXPECTS_BRACKET) {
|
|
$current_scope |= F_SCOPE_END;
|
|
next;
|
|
}
|
|
if ($prev_token_type eq 'ARROW' or $prev_token_type eq 'VARIABLE') {
|
|
$token_type = 'VARIABLE';
|
|
next;
|
|
} elsif ($waiting_for_a_block) {
|
|
$waiting_for_a_block = 0;
|
|
if (@keywords and $c->token_expects_block($keywords[0])) {
|
|
my $first_token = $keywords[0];
|
|
$current_scope |= F_EXPR_END;
|
|
if ($c->token_defines_sub($first_token) and $c->has_callback_for(sub => $first_token)) {
|
|
$c->run_callback_for(sub => $first_token, \@tokens);
|
|
$current_scope &= MASK_KEEP_TOKENS;
|
|
@tokens = ();
|
|
}
|
|
}
|
|
next;
|
|
} elsif ($prev_token_type eq 'KEYWORD' and $c->token_expects_fh_or_block_list($prev_token)) {
|
|
$token_type = '';
|
|
next;
|
|
} else {
|
|
$token_type = 'EXPR';
|
|
next;
|
|
}
|
|
}
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc) = ($c1, $c1);
|
|
my $stack_owner;
|
|
if (@keywords) {
|
|
for(my $i = @keywords; $i > 0; $i--) {
|
|
my $keyword = $keywords[$i - 1];
|
|
if ($c->token_expects_block($keyword)) {
|
|
$stack_owner = $keyword;
|
|
if (@tokens and $c->token_defines_sub($keyword) and $c->has_callback_for(sub => $keyword)) {
|
|
$c->run_callback_for(sub => $keyword, \@tokens);
|
|
$current_scope &= MASK_KEEP_TOKENS;
|
|
@tokens = ();
|
|
}
|
|
last;
|
|
}
|
|
}
|
|
}
|
|
$stack = [$token, $pos, $stack_owner || ''];
|
|
if ($parent_scope & F_EXPECTS_BRACKET) {
|
|
$current_scope |= F_SCOPE_END|F_STATEMENT_END|F_EXPR_END;
|
|
next;
|
|
}
|
|
if ($prev_token_type eq 'ARROW' or $prev_token_type eq 'VARIABLE') {
|
|
$token_type = 'VARIABLE';
|
|
} elsif ($waiting_for_a_block) {
|
|
$waiting_for_a_block = 0;
|
|
} else {
|
|
$token_type = (($current_scope | $parent_scope) & F_KEEP_TOKENS) ? 'EXPR' : '';
|
|
}
|
|
next;
|
|
} elsif ($c1 eq '[') {
|
|
if ($$rstr =~ m{\G(\[(?:$re_nonblock_chars)\])}gc) {
|
|
($token, $token_desc, $token_type) = ($1, '[EXPR]', 'VARIABLE');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'VARIABLE');
|
|
$stack = [$token, $pos, 'VARIABLE'];
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '(') {
|
|
my $prototype_re = $c->prototype_re;
|
|
if ($waiting_for_a_block and @keywords and $c->token_defines_sub($keywords[-1]) and $$rstr =~ m{$prototype_re}gc) {
|
|
my $proto = $1;
|
|
if ($proto =~ /^\([\\\$\@\%\&\[\]\*;\+]*\)$/) {
|
|
($token, $token_desc, $token_type) = ($proto, '(PROTOTYPE)', '');
|
|
} else {
|
|
($token, $token_desc, $token_type) = ($proto, '(SIGNATURES)', '');
|
|
$c->add_perl('5.020', 'signatures');
|
|
}
|
|
next;
|
|
} elsif ($$rstr =~ m{\G\(((?:$re_nonblock_chars)(?<!\$))\)}gc) {
|
|
($token, $token_desc, $token_type) = ([[[$1, 'EXPR']]], '()', 'EXPR');
|
|
if ($prev_token_type eq 'KEYWORD' and @keywords and $keywords[-1] eq $prev_token and !$c->token_expects_expr_block($prev_token)) {
|
|
if ($prev_token eq 'eval') {
|
|
$current_scope &= MASK_EVAL;
|
|
$c->{eval} = ($current_scope | $parent_scope) & F_EVAL ? 1 : 0;
|
|
}
|
|
pop @keywords;
|
|
}
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'EXPR');
|
|
my $stack_owner;
|
|
if (@keywords) {
|
|
for (my $i = @keywords; $i > 0; $i--) {
|
|
my $keyword = $keywords[$i - 1];
|
|
if ($c->token_expects_block($keyword)) {
|
|
$stack_owner = $keyword;
|
|
last;
|
|
}
|
|
}
|
|
}
|
|
$stack = [$token, $pos, $stack_owner || ''];
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '}') {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, '');
|
|
$unstack = $token;
|
|
$current_scope |= F_STATEMENT_END|F_EXPR_END;
|
|
next;
|
|
} elsif ($c1 eq ']') {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, '');
|
|
$unstack = $token;
|
|
next;
|
|
} elsif ($c1 eq ')') {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, '');
|
|
$unstack = $token;
|
|
next;
|
|
} elsif ($c1 eq '<') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '<'){
|
|
if ($$rstr =~ m{\G(<<(?:
|
|
\\. |
|
|
\w+ |
|
|
[./-] |
|
|
\[[^\]]*\] |
|
|
\{[^\}]*\} |
|
|
\* |
|
|
\? |
|
|
\~ |
|
|
\$ |
|
|
)*(?<!\-)>>)}gcx) {
|
|
($token, $token_desc, $token_type) = ($1, '<<NAME>>', 'EXPR');
|
|
$c->add_perl('5.022', '<<NAME>>');
|
|
next;
|
|
} elsif ($$rstr =~ m{\G<<~?\s*(?:
|
|
\\?[A-Za-z_][\w]* |
|
|
"(?:[^\\"]*(?:\\.[^\\"]*)*)" |
|
|
'(?:[^\\']*(?:\\.[^\\']*)*)' |
|
|
`(?:[^\\`]*(?:\\.[^\\`]*)*)`
|
|
)}sx) {
|
|
if (my $heredoc = $self->_match_heredoc($c, $rstr)) {
|
|
($token, $token_desc, $token_type) = ($heredoc, 'HEREDOC', 'EXPR');
|
|
next;
|
|
} else {
|
|
# the above may fail
|
|
pos($$rstr) = $pos;
|
|
}
|
|
}
|
|
if (substr($$rstr, $pos + 2, 1) eq '=') {
|
|
pos($$rstr) = $pos + 3;
|
|
($token, $token_desc, $token_type) = ('<<=', '<<=', 'OP');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('<<', '<<', 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c2 eq '=') {
|
|
if (substr($$rstr, $pos + 2, 1) eq '>') {
|
|
pos($$rstr) = $pos + 3;
|
|
($token, $token_desc, $token_type) = ('<=>', '<=>', 'OP');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('<=', '<=', 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c2 eq '>') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('<>', '<>', 'OP');
|
|
next;
|
|
} elsif ($$rstr =~ m{\G(<(?:
|
|
\\. |
|
|
\w+ |
|
|
[./-] |
|
|
\[[^\]]*\] |
|
|
\{[^\}]*\} |
|
|
\* |
|
|
\? |
|
|
\~ |
|
|
\$ |
|
|
)*(?<!\-)>)}gcx) {
|
|
($token, $token_desc, $token_type) = ($1, '<NAME>', 'EXPR');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq ':') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq ':') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('::', '::', '');
|
|
next;
|
|
}
|
|
if ($waiting_for_a_block and @keywords and $c->token_defines_sub($keywords[-1])) {
|
|
while($$rstr =~ m{\G\s*(:?\s*[\w]+)}gcs) {
|
|
my $startpos = pos($$rstr);
|
|
if (substr($$rstr, $startpos, 1) eq '(') {
|
|
my @nest = '(';
|
|
pos($$rstr) = $startpos + 1;
|
|
my ($p, $c1);
|
|
while(defined($p = pos($$rstr))) {
|
|
$c1 = substr($$rstr, $p, 1);
|
|
if ($c1 eq '\\') {
|
|
pos($$rstr) = $p + 2;
|
|
next;
|
|
}
|
|
if ($c1 eq ')') {
|
|
pop @nest;
|
|
pos($$rstr) = $p + 1;
|
|
last unless @nest;
|
|
}
|
|
if ($c1 eq '(') {
|
|
push @nest, $c1;
|
|
pos($$rstr) = $p + 1;
|
|
next;
|
|
}
|
|
$$rstr =~ m{\G([^\\()]+)}gc and next;
|
|
}
|
|
}
|
|
}
|
|
$token = substr($$rstr, $pos, pos($$rstr) - $pos);
|
|
($token_desc, $token_type) = ('ATTRIBUTE', '');
|
|
if ($token =~ /^:prototype\(/) {
|
|
$c->add_perl('5.020', ':prototype');
|
|
}
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '=') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '>') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('=>', 'COMMA', 'OP');
|
|
if (@keywords and $prev_token_type eq 'KEYWORD' and $keywords[-1] eq $prev_token) {
|
|
pop @keywords;
|
|
if (!@keywords and ($current_scope & F_KEEP_TOKENS)) {
|
|
$current_scope &= MASK_KEEP_TOKENS;
|
|
@tokens = ();
|
|
}
|
|
}
|
|
next;
|
|
} elsif ($c2 eq '=') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('==', '==', 'OP');
|
|
next;
|
|
} elsif ($c2 eq '~') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('=~', '=~', 'OP');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '>') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '>') {
|
|
if (substr($$rstr, $pos + 2, 1) eq '=') {
|
|
pos($$rstr) = $pos + 3;
|
|
($token, $token_desc, $token_type) = ('>>=', '>>=', 'OP');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('>>', '>>', 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c2 eq '=') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('>=', '>=', 'OP');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '+') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '+') {
|
|
if (substr($$rstr, $pos + 2, 1) eq '=') {
|
|
pos($$rstr) = $pos + 3;
|
|
($token, $token_desc, $token_type) = ('++=', '++=', 'OP');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('++', '++', $prev_token_type);
|
|
next;
|
|
}
|
|
} elsif ($c2 eq '=') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('+=', '+=', 'OP');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '|') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '|') {
|
|
if (substr($$rstr, $pos + 2, 1) eq '=') {
|
|
pos($$rstr) = $pos + 3;
|
|
($token, $token_desc, $token_type) = ('||=', '||=', 'OP');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('||', '||', 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c2 eq '=') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('|=', '|=', 'OP');
|
|
next;
|
|
} elsif ($c2 eq '.') {
|
|
if (substr($$rstr, $pos + 2, 1) eq '=') {
|
|
pos($$rstr) = $pos + 3;
|
|
($token, $token_desc, $token_type) = ('|.=', '|.=', 'OP');
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('|.', '|.', 'OP');
|
|
}
|
|
$c->add_perl('5.022', '|.');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '^') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '=') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('^=', '^=', 'OP');
|
|
next;
|
|
} elsif ($c2 eq '.') {
|
|
if (substr($$rstr, $pos + 2, 1) eq '=') {
|
|
pos($$rstr) = $pos + 3;
|
|
($token, $token_desc, $token_type) = ('^.=', '^.=', 'OP');
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('^.', '^.', 'OP');
|
|
}
|
|
$c->add_perl('5.022', '^.');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '!') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '~') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('!~', '!~', 'OP');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '~') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '~') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('~~', '~~', 'OP');
|
|
$c->add_perl('5.010', '~~');
|
|
next;
|
|
} elsif ($c2 eq '.') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('~.', '~.', 'OP');
|
|
$c->add_perl('5.022', '~.');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq ',') {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, 'COMMA', 'OP');
|
|
next;
|
|
} elsif ($c1 eq '?') {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
} elsif ($c1 eq '.') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq '.') {
|
|
if (substr($$rstr, $pos + 2, 1) eq '.') {
|
|
pos($$rstr) = $pos + 3;
|
|
($token, $token_desc, $token_type) = ('...', '...', 'OP');
|
|
$c->add_perl('5.012', '...');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('..', '..', 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c2 eq '=') {
|
|
pos($$rstr) = $pos + 2;
|
|
($token, $token_desc, $token_type) = ('.=', '.=', 'OP');
|
|
next;
|
|
} else {
|
|
pos($$rstr) = $pos + 1;
|
|
($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
|
|
next;
|
|
}
|
|
} elsif ($c1 eq '0') {
|
|
my $c2 = substr($$rstr, $pos + 1, 1);
|
|
if ($c2 eq 'x') {
|
|
if ($$rstr =~ m{\G(0x[0-9A-Fa-f_]+)}gc) {
|
|
($token, $token_desc, $token_type) = ($1, 'HEX NUMBER', 'EXPR');
|
|
next;
|
|
}
|
|
} elsif ($c2 eq 'b') {
|
|
if ($$rstr =~ m{\G(0b[01_]+)}gc) {
|
|
($token, $token_desc, $token_type) = ($1, 'BINARY NUMBER', 'EXPR');
|
|
next;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($$rstr =~ m{\G((?:0|[1-9][0-9_]*)(?:\.[0-9][0-9_]*)?)}gc) {
|
|
my $number = $1;
|
|
my $p = pos($$rstr);
|
|
my $n1 = substr($$rstr, $p, 1);
|
|
if ($n1 eq '.') {
|
|
if ($$rstr =~ m{\G((?:\.[0-9_])+)}gc) {
|
|
$number .= $1;
|
|
($token, $token_desc, $token_type) = ($number, 'VERSION_STRING', 'EXPR');
|
|
next;
|
|
} elsif (substr($$rstr, $p, 2) ne '..') {
|
|
$number .= '.';
|
|
pos($$rstr) = $p + 1;
|
|
}
|
|
} elsif ($n1 eq 'E' or $n1 eq 'e') {
|
|
if ($$rstr =~ m{\G([Ee][+-]?[0-9]+)}gc) {
|
|
$number .= $1;
|
|
}
|
|
}
|
|
($token, $token_desc, $token_type) = ($number, 'NUMBER', 'EXPR');
|
|
if ($prepend) {
|
|
$token = "$prepend$token";
|
|
pop @tokens if @tokens and $tokens[-1][0] eq $prepend;
|
|
pop @scope_tokens if @scope_tokens and $scope_tokens[-1][0] eq $prepend;
|
|
}
|
|
next;
|
|
}
|
|
|
|
if ($prev_token_type ne 'ARROW' and ($prev_token_type ne 'KEYWORD' or !$c->token_expects_word($prev_token))) {
|
|
if ($prev_token_type eq 'EXPR' or $prev_token_type eq 'VARIABLE') {
|
|
if ($c1 eq 'x') {
|
|
if ($$rstr =~ m{\G(x\b(?!\s*=>))}gc){
|
|
($token, $token_desc, $token_type) = ($1, $1, '');
|
|
next;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($c1 eq 'q') {
|
|
my $quotelike_re = $c->quotelike_re;
|
|
if ($$rstr =~ m{\G((?:$quotelike_re)\b(?!\s*=>))}gc) {
|
|
if (my $quotelike = $self->_match_quotelike($c, $rstr, $1)) {
|
|
($token, $token_desc, $token_type) = ($quotelike, 'STRING', 'STRING');
|
|
next;
|
|
} else {
|
|
_debug("QUOTELIKE ERROR: $@") if DEBUG;
|
|
pos($$rstr) = $pos;
|
|
}
|
|
} elsif ($$rstr =~ m{\G((?:qw)\b(?!\s*=>))}gc) {
|
|
if (my $quotelike = $self->_match_quotelike($c, $rstr, $1)) {
|
|
($token, $token_desc, $token_type) = ($quotelike, 'QUOTED_WORD_LIST', 'EXPR');
|
|
next;
|
|
} else {
|
|
_debug("QUOTELIKE ERROR: $@") if DEBUG;
|
|
pos($$rstr) = $pos;
|
|
}
|
|
} elsif ($$rstr =~ m{\G((?:qx)\b(?!\s*=>))}gc) {
|
|
if (my $quotelike = $self->_match_quotelike($c, $rstr, $1)) {
|
|
($token, $token_desc, $token_type) = ($quotelike, 'BACKTICK', 'EXPR');
|
|
next;
|
|
} else {
|
|
_debug("QUOTELIKE ERROR: $@") if DEBUG;
|
|
pos($$rstr) = $pos;
|
|
}
|
|
} elsif ($$rstr =~ m{\G(qr\b(?!\s*=>))}gc) {
|
|
if (my $regexp = $self->_match_regexp($c, $rstr)) {
|
|
($token, $token_desc, $token_type) = ($regexp, 'qr', 'EXPR');
|
|
next;
|
|
} else {
|
|
_debug("QUOTELIKE ERROR: $@") if DEBUG;
|
|
pos($$rstr) = $pos;
|
|
}
|
|
}
|
|
} elsif ($c1 eq 'm') {
|
|
if ($$rstr =~ m{\G(m\b(?!\s*=>))}gc) {
|
|
if (my $regexp = $self->_match_regexp($c, $rstr)) {
|
|
($token, $token_desc, $token_type) = ($regexp, 'm', 'EXPR');
|
|
next;
|
|
} else {
|
|
_debug("REGEXP ERROR: $@") if DEBUG;
|
|
pos($$rstr) = $pos;
|
|
}
|
|
}
|
|
} elsif ($c1 eq 's') {
|
|
if ($$rstr =~ m{\G(s\b(?!\s*=>))}gc) {
|
|
if (my $regexp = $self->_match_substitute($c, $rstr)) {
|
|
($token, $token_desc, $token_type) = ($regexp, 's', 'EXPR');
|
|
next;
|
|
} else {
|
|
_debug("SUBSTITUTE ERROR: $@") if DEBUG;
|
|
pos($$rstr) = $pos;
|
|
}
|
|
}
|
|
} elsif ($c1 eq 't') {
|
|
if ($$rstr =~ m{\G(tr\b(?!\s*=>))}gc) {
|
|
if (my $trans = $self->_match_transliterate($c, $rstr)) {
|
|
($token, $token_desc, $token_type) = ($trans, 'tr', 'EXPR');
|
|
next;
|
|
} else {
|
|
_debug("TRANSLITERATE ERROR: $@") if DEBUG;
|
|
pos($$rstr) = $pos;
|
|
}
|
|
}
|
|
} elsif ($c1 eq 'y') {
|
|
if ($$rstr =~ m{\G(y\b(?!\s*=>))}gc) {
|
|
if (my $trans = $self->_match_transliterate($c, $rstr)) {
|
|
($token, $token_desc, $token_type) = ($trans, 'y', 'EXPR');
|
|
next;
|
|
} else {
|
|
_debug("TRANSLITERATE ERROR: $@") if DEBUG;
|
|
pos($$rstr) = $pos;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($$rstr =~ m{\G(\w+)}gc) {
|
|
$token = $1;
|
|
if ($prev_token_type eq 'ARROW') {
|
|
$$rstr =~ m{\G((?:(?:::|')\w+)+)\b}gc and $token .= $1;
|
|
($token_desc, $token_type) = ('METHOD', 'METHOD');
|
|
} elsif ($token eq 'CORE') {
|
|
($token_desc, $token_type) = ('NAMESPACE', 'WORD');
|
|
} elsif ($token eq 'format') {
|
|
if ($$rstr =~ m{\G([^=]*?=[ \t]*\n.*?\n\.\n)}gcs) {
|
|
$token .= $1;
|
|
($token_desc, $token_type) = ('FORMAT', '');
|
|
$current_scope |= F_STATEMENT_END|F_EXPR_END;
|
|
next;
|
|
}
|
|
} elsif ($c->token_is_keyword($token) and ($prev_token_type ne 'KEYWORD' or !$c->token_expects_word($prev_token) or ($prev_token eq 'sub' and $token eq 'BEGIN'))) {
|
|
if ($c->token_is_op_keyword($token)) {
|
|
($token_desc, $token_type) = ($token, 'OP');
|
|
} else {
|
|
($token_desc, $token_type) = ('KEYWORD', 'KEYWORD');
|
|
$c->check_new_keyword($token);
|
|
push @keywords, $token unless $token eq 'undef';
|
|
}
|
|
} else {
|
|
if ($c1 eq 'v' and $token =~ /^v(?:0|[1-9][0-9]*)$/) {
|
|
if ($$rstr =~ m{\G((?:\.[0-9][0-9_]*)+)}gc) {
|
|
$token .= $1;
|
|
($token_desc, $token_type) = ('VERSION_STRING', 'EXPR');
|
|
next;
|
|
}
|
|
}
|
|
$$rstr =~ m{\G((?:(?:::|')\w+)+)\b}gc and $token .= $1;
|
|
($token_desc, $token_type) = ('WORD', 'WORD');
|
|
if ($prepend) {
|
|
$token = "$prepend$token";
|
|
pop @tokens if @tokens and $tokens[-1][0] eq $prepend;
|
|
pop @scope_tokens if @scope_tokens and $scope_tokens[-1][0] eq $prepend;
|
|
}
|
|
}
|
|
next;
|
|
}
|
|
|
|
# ignore control characters
|
|
if ($$rstr =~ m{\G([[:cntrl:]]+)}gc) {
|
|
next;
|
|
}
|
|
|
|
if ($$rstr =~ m{\G([[:ascii:]]+)}gc) {
|
|
last if $parent_scope & F_STRING_EVAL;
|
|
_error("UNKNOWN: $1");
|
|
push @{$c->{errors}}, qq{"$1"};
|
|
$token = $1;
|
|
next;
|
|
}
|
|
if ($$rstr =~ m{\G([[:^ascii:]](?:[[:^ascii:]]|\w)*)}gc) {
|
|
if (!$c->{utf8}) {
|
|
last if $parent_scope & F_STRING_EVAL;
|
|
_error("UNICODE?: $1");
|
|
push @{$c->{errors}}, qq{"$1"};
|
|
} else {
|
|
_debug("UTF8: $1") if DEBUG;
|
|
}
|
|
$token = $1;
|
|
next;
|
|
}
|
|
if ($$rstr =~ m{\G(\S+)}gc) {
|
|
last if $parent_scope & F_STRING_EVAL;
|
|
_error("UNEXPECTED: $1");
|
|
push @{$c->{errors}}, qq{"$1"};
|
|
$token = $1;
|
|
}
|
|
|
|
last;
|
|
} continue {
|
|
die "Aborted at $prev_pos" if $prev_pos == pos($$rstr);
|
|
$prev_pos = pos($$rstr);
|
|
|
|
if (defined $token) {
|
|
if (!($current_scope & F_EXPR)) {
|
|
_debug('BEGIN EXPR') if DEBUG;
|
|
$current_scope |= F_EXPR;
|
|
} elsif (($current_scope & F_EXPR) and (($current_scope & F_EXPR_END) or ($ends_expr{$token} and $token_type eq 'KEYWORD' and $prev_token ne ',' and $prev_token ne '=>'))) {
|
|
@keywords = ();
|
|
_debug('END EXPR') if DEBUG;
|
|
$current_scope &= MASK_EXPR_END;
|
|
}
|
|
$prepend = undef;
|
|
|
|
if (DEBUG) {
|
|
my $token_str = ref $token ? Data::Dump::dump($token) : $token;
|
|
_debug("GOT: $token_str ($pos) TYPE: $token_desc ($token_type)".($prev_token_type ? " PREV: $prev_token_type" : '').(@keywords ? " KEYWORD: @keywords" : '').(($current_scope | $parent_scope) & F_EVAL ? ' EVAL' : '').(($current_scope | $parent_scope) & F_KEEP_TOKENS ? ' KEEP' : ''));
|
|
}
|
|
|
|
if ($parent_scope & F_KEEP_TOKENS) {
|
|
push @scope_tokens, [$token, $token_desc];
|
|
if ($token eq '-' or $token eq '+') {
|
|
$prepend = $token;
|
|
}
|
|
}
|
|
if (!($current_scope & F_KEEP_TOKENS) and (exists $c->{callback}{$token} or exists $c->{keyword}{$token} or exists $c->{sub}{$token}) and $token_type ne 'METHOD' and !$c->token_expects_word($prev_token)) {
|
|
$current_scope |= F_KEEP_TOKENS;
|
|
}
|
|
if ($c->token_expects_block($token)) {
|
|
$waiting_for_a_block = 1;
|
|
}
|
|
if ($current_scope & F_EVAL or ($parent_scope & F_EVAL and (!@{$c->{stack}} or $c->{stack}[-1][0] ne '{'))) {
|
|
if ($token_type eq 'STRING') {
|
|
if ($token->[0] =~ /\b(?:(?:use|no)\s+[A-Za-z]|require\s+(?:q[qw]?.|['"])?[A-Za-z])/) {
|
|
my $eval_string = $token->[0];
|
|
if (defined $eval_string and $eval_string ne '') {
|
|
$eval_string =~ s/\\(.)/$1/g;
|
|
pos($eval_string) = 0;
|
|
$c->{eval} = 1;
|
|
my $saved_stack = $c->{stack};
|
|
$c->{stack} = [];
|
|
eval { $self->_scan($c, \$eval_string, (
|
|
($current_scope | $parent_scope | F_STRING_EVAL) &
|
|
F_RESCAN
|
|
))};
|
|
$c->{stack} = $saved_stack;
|
|
}
|
|
}
|
|
$current_scope &= MASK_EVAL;
|
|
} elsif ($token_desc eq 'HEREDOC') {
|
|
if ($token->[0] =~ /\b(?:use|require|no)\s+[A-Za-z]/) {
|
|
my $eval_string = $token->[0];
|
|
if (defined $eval_string and $eval_string ne '') {
|
|
$eval_string =~ s/\\(.)/$1/g;
|
|
pos($eval_string) = 0;
|
|
$c->{eval} = 1;
|
|
my $saved_stack = $c->{stack};
|
|
$c->{stack} = [];
|
|
eval { $self->_scan($c, \$eval_string, (
|
|
($current_scope | $parent_scope | F_STRING_EVAL) &
|
|
F_RESCAN
|
|
))};
|
|
$c->{stack} = $saved_stack;
|
|
}
|
|
}
|
|
$current_scope &= MASK_EVAL;
|
|
} elsif ($token_type eq 'VARIABLE') {
|
|
$current_scope &= MASK_EVAL;
|
|
}
|
|
$c->{eval} = ($current_scope | $parent_scope) & F_EVAL ? 1 : 0;
|
|
}
|
|
if ($token eq 'eval') {
|
|
$current_scope |= F_EVAL;
|
|
$c->{eval} = 1;
|
|
}
|
|
|
|
if ($current_scope & F_KEEP_TOKENS) {
|
|
push @tokens, [$token, $token_desc];
|
|
if ($token eq '-' or $token eq '+') {
|
|
$prepend = $token;
|
|
}
|
|
if ($token_type eq 'KEYWORD' and $has_sideff{$token}) {
|
|
$current_scope |= F_SIDEFF;
|
|
}
|
|
}
|
|
if ($stack) {
|
|
push @{$c->{stack}}, $stack;
|
|
_dump_stack($c, $stack->[0]) if DEBUG;
|
|
my $child_scope = $current_scope | $parent_scope;
|
|
if ($token eq '{' and $is_conditional{$stack->[2]}) {
|
|
$child_scope |= F_CONDITIONAL
|
|
}
|
|
my $scanned_tokens = $self->_scan($c, $rstr, (
|
|
$child_scope & F_RESCAN
|
|
));
|
|
if ($token eq '{' and $current_scope & F_EVAL) {
|
|
$current_scope &= MASK_EVAL;
|
|
$c->{eval} = ($current_scope | $parent_scope) & F_EVAL ? 1 : 0;
|
|
}
|
|
if ($current_scope & F_KEEP_TOKENS) {
|
|
my $start = pop @tokens || '';
|
|
my $end = pop @$scanned_tokens || '';
|
|
push @tokens, [$scanned_tokens, "$start->[0]$end->[0]"];
|
|
} elsif ($parent_scope & F_KEEP_TOKENS) {
|
|
my $start = pop @scope_tokens || '';
|
|
my $end = pop @$scanned_tokens || '';
|
|
push @scope_tokens, [$scanned_tokens, "$start->[0]$end->[0]"];
|
|
}
|
|
|
|
if ($stack->[0] eq '(' and $prev_token_type eq 'KEYWORD' and @keywords and $keywords[-1] eq $prev_token and !$c->token_expects_expr_block($prev_token)) {
|
|
pop @keywords;
|
|
}
|
|
|
|
if ($stack->[0] eq '{' and @keywords and $c->token_expects_block($keywords[0]) and !$c->token_expects_block_list($keywords[-1])) {
|
|
$current_scope |= F_STATEMENT_END unless @tokens and ($c->token_defines_sub($keywords[-1]) or $keywords[-1] eq 'eval');
|
|
}
|
|
$stack = undef;
|
|
}
|
|
if ($current_scope & F_STATEMENT_END) {
|
|
if (($current_scope & F_KEEP_TOKENS) and @tokens) {
|
|
my $first_token = $tokens[0][0];
|
|
if ($first_token eq '->') {
|
|
$first_token = $tokens[1][0];
|
|
# ignore ->use and ->no
|
|
# ->require may be from UNIVERSAL::require
|
|
if ($first_token eq 'use' or $first_token eq 'no') {
|
|
$first_token = '';
|
|
}
|
|
}
|
|
my $cond = (($current_scope | $parent_scope) & (F_CONDITIONAL|F_SIDEFF)) ? 1 : 0;
|
|
if (exists $c->{callback}{$first_token}) {
|
|
$c->{current_scope} = \$current_scope;
|
|
$c->{cond} = $cond;
|
|
$c->{callback}{$first_token}->($c, $rstr, \@tokens);
|
|
|
|
if ($c->{found_unsupported_package} and !$c->{quick}) {
|
|
my $unsupported = $c->{found_unsupported_package};
|
|
$c->{quick} = 1;
|
|
$self->_skim_string($c, $rstr);
|
|
warn "Unsupported package '$unsupported' is found. Result may be incorrect.\n";
|
|
}
|
|
}
|
|
if (exists $c->{keyword}{$first_token}) {
|
|
$c->{current_scope} = \$current_scope;
|
|
$c->{cond} = $cond;
|
|
$tokens[0][1] = 'KEYWORD';
|
|
$c->run_callback_for(keyword => $first_token, \@tokens);
|
|
}
|
|
if (exists $c->{method}{$first_token} and $caller_package) {
|
|
unshift @tokens, [$caller_package, 'WORD'];
|
|
$c->{current_scope} = \$current_scope;
|
|
$c->{cond} = $cond;
|
|
$c->run_callback_for(method => $first_token, \@tokens);
|
|
}
|
|
if ($current_scope & F_SIDEFF) {
|
|
$current_scope &= MASK_SIDEFF;
|
|
while(my $token = shift @tokens) {
|
|
last if $has_sideff{$token->[0]};
|
|
}
|
|
$current_scope &= F_SIDEFF if grep {$has_sideff{$_->[0]}} @tokens;
|
|
if (@tokens) {
|
|
$first_token = $tokens[0][0];
|
|
$cond = (($current_scope | $parent_scope) & (F_CONDITIONAL|F_SIDEFF)) ? 1 : 0;
|
|
if (exists $c->{callback}{$first_token}) {
|
|
$c->{current_scope} = \$current_scope;
|
|
$c->{cond} = $cond;
|
|
$c->{callback}{$first_token}->($c, $rstr, \@tokens);
|
|
}
|
|
if (exists $c->{keyword}{$first_token}) {
|
|
$c->{current_scope} = \$current_scope;
|
|
$c->{cond} = $cond;
|
|
$tokens[0][1] = 'KEYWORD';
|
|
$c->run_callback_for(keyword => $first_token, \@tokens);
|
|
}
|
|
if (exists $c->{method}{$first_token} and $caller_package) {
|
|
unshift @tokens, [$caller_package, 'WORD'];
|
|
$c->{current_scope} = \$current_scope;
|
|
$c->{cond} = $cond;
|
|
$c->run_callback_for(method => $first_token, \@tokens);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@tokens = ();
|
|
@keywords = ();
|
|
$current_scope &= MASK_STATEMENT_END;
|
|
$caller_package = undef;
|
|
$token = $token_type = '';
|
|
_debug('END SENTENSE') if DEBUG;
|
|
}
|
|
if ($unstack and @{$c->{stack}}) {
|
|
my $stacked = pop @{$c->{stack}};
|
|
my $stacked_type = substr($stacked->[0], -1);
|
|
if (
|
|
($unstack eq '}' and $stacked_type ne '{') or
|
|
($unstack eq ']' and $stacked_type ne '[') or
|
|
($unstack eq ')' and $stacked_type ne '(')
|
|
) {
|
|
my $prev_pos = $stacked->[1] || 0;
|
|
die "mismatch $stacked_type $unstack\n" .
|
|
substr($$rstr, $prev_pos, pos($$rstr) - $prev_pos);
|
|
}
|
|
_dump_stack($c, $unstack) if DEBUG;
|
|
$current_scope |= F_SCOPE_END;
|
|
$unstack = undef;
|
|
}
|
|
|
|
last if $current_scope & F_SCOPE_END;
|
|
last if $c->{ended};
|
|
last if $c->{last_found_by_skimming} and $c->{last_found_by_skimming} < pos($$rstr);
|
|
|
|
($prev_token, $prev_token_type) = ($token, $token_type);
|
|
}
|
|
|
|
if (@{$c->{errors}} and !($parent_scope & F_STRING_EVAL)) {
|
|
my $rest = substr($$rstr, pos($$rstr));
|
|
_error("REST:\n\n".$rest) if $rest;
|
|
last;
|
|
}
|
|
}
|
|
|
|
if (@tokens) {
|
|
if (my $first_token = $tokens[0][0]) {
|
|
if (exists $c->{callback}{$first_token}) {
|
|
$c->{callback}{$first_token}->($c, $rstr, \@tokens);
|
|
}
|
|
if (exists $c->{keyword}{$first_token}) {
|
|
$tokens[0][1] = 'KEYWORD';
|
|
$c->run_callback_for(keyword => $first_token, \@tokens);
|
|
}
|
|
}
|
|
}
|
|
|
|
_dump_stack($c, "END SCOPE") if DEBUG;
|
|
|
|
\@scope_tokens;
|
|
}
|
|
|
|
sub _match_quotelike {
|
|
my ($self, $c, $rstr, $op) = @_;
|
|
|
|
# '#' only works when it comes just after the op,
|
|
# without prepending spaces
|
|
$$rstr =~ m/\G(?:\s(?:$re_comment))?\s*/gcs;
|
|
|
|
unless ($$rstr =~ m/\G(\S)/gc) {
|
|
return _match_error($rstr, "No block delimiter found after $op");
|
|
}
|
|
my $ldel = $1;
|
|
my $startpos = pos($$rstr);
|
|
|
|
if ($ldel =~ /[[(<{]/) {
|
|
my ($rdel, $re_skip) = _gen_rdel_and_re_skip($ldel);
|
|
my @nest = ($ldel);
|
|
my ($p, $c1);
|
|
while(defined($p = pos($$rstr))) {
|
|
$c1 = substr($$rstr, $p, 1);
|
|
if ($c1 eq '\\') {
|
|
pos($$rstr) = $p + 2;
|
|
next;
|
|
}
|
|
if ($c1 eq $ldel) {
|
|
pos($$rstr) = $p + 1;
|
|
push @nest, $ldel;
|
|
next;
|
|
}
|
|
if ($c1 eq $rdel) {
|
|
pos($$rstr) = $p + 1;
|
|
pop @nest;
|
|
last unless @nest;
|
|
next;
|
|
}
|
|
$$rstr =~ m{\G$re_skip}gc and next;
|
|
last;
|
|
}
|
|
return if @nest;
|
|
} else {
|
|
my $re = _gen_re_str_in_delims_with_end_delim($ldel);
|
|
$$rstr =~ /\G$re/gcs or return;
|
|
}
|
|
|
|
my $endpos = pos($$rstr);
|
|
|
|
return [substr($$rstr, $startpos, $endpos - $startpos - 1), $op];
|
|
}
|
|
|
|
sub _match_regexp0 { # //
|
|
my ($self, $c, $rstr, $startpos, $token_type) = @_;
|
|
pos($$rstr) = $startpos + 1;
|
|
|
|
my $re_shortcut = _gen_re_regexp_shortcut('/');
|
|
$$rstr =~ m{\G$re_shortcut}gcs or # shortcut
|
|
defined($self->_scan_re($c, $rstr, '/', '/', $token_type ? 'm' : '')) or return _match_error($rstr, "Closing delimiter was not found: $@");
|
|
|
|
$$rstr =~ m/\G([msixpodualgc]*)/gc;
|
|
my $mod = $1;
|
|
|
|
my $endpos = pos($$rstr);
|
|
|
|
my $re = substr($$rstr, $startpos, $endpos - $startpos);
|
|
if ($re =~ /\n/s and $mod !~ /x/) {
|
|
return _match_error($rstr, "multiline without x");
|
|
}
|
|
return $re;
|
|
}
|
|
|
|
sub _match_regexp {
|
|
my ($self, $c, $rstr) = @_;
|
|
my $startpos = pos($$rstr) || 0;
|
|
|
|
# '#' only works when it comes just after the op,
|
|
# without prepending spaces
|
|
$$rstr =~ m/\G(?:\s(?:$re_comment))?\s*/gcs;
|
|
|
|
unless ($$rstr =~ m/\G(\S)/gc) {
|
|
return _match_error($rstr, "No block delimiter found");
|
|
}
|
|
my ($ldel, $rdel) = ($1, $1);
|
|
|
|
if ($ldel =~ /[[(<{]/) {
|
|
$rdel =~ tr/[({</])}>/;
|
|
}
|
|
|
|
my $re_shortcut = _gen_re_regexp_shortcut($ldel, $rdel);
|
|
$$rstr =~ m{\G$re_shortcut}gcs or # shortcut
|
|
defined($self->_scan_re($c, $rstr, $ldel, $rdel, 'm/qr')) or return _match_error($rstr, "Closing delimiter was not found: $@");
|
|
|
|
# strictly speaking, qr// doesn't support gc.
|
|
$$rstr =~ m/\G[msixpodualgc]*/gc;
|
|
my $endpos = pos($$rstr);
|
|
|
|
return substr($$rstr, $startpos, $endpos - $startpos);
|
|
}
|
|
|
|
sub _match_substitute {
|
|
my ($self, $c, $rstr) = @_;
|
|
my $startpos = pos($$rstr) || 0;
|
|
|
|
# '#' only works when it comes just after the op,
|
|
# without prepending spaces
|
|
$$rstr =~ m/\G(?:\s(?:$re_comment))?\s*/gcs;
|
|
|
|
unless ($$rstr =~ m/\G(\S)/gc) {
|
|
return _match_error($rstr, "No block delimiter found");
|
|
}
|
|
my ($ldel1, $rdel1) = ($1, $1);
|
|
|
|
if ($ldel1 =~ /[[(<{]/) {
|
|
$rdel1 =~ tr/[({</])}>/;
|
|
}
|
|
|
|
my $re_shortcut = _gen_re_regexp_shortcut($ldel1, $rdel1);
|
|
($ldel1 ne '\\' and $$rstr =~ m{\G$re_shortcut}gcs) or # shortcut
|
|
defined($self->_scan_re($c, $rstr, $ldel1, $rdel1, 's')) or return _match_error($rstr, "Closing delimiter was not found: $@");
|
|
defined($self->_scan_re2($c, $rstr, $ldel1, 's')) or return;
|
|
$$rstr =~ m/\G[msixpodualgcer]*/gc;
|
|
my $endpos = pos($$rstr);
|
|
|
|
return substr($$rstr, $startpos, $endpos - $startpos);
|
|
}
|
|
|
|
sub _match_transliterate {
|
|
my ($self, $c, $rstr) = @_;
|
|
my $startpos = pos($$rstr) || 0;
|
|
|
|
# '#' only works when it comes just after the op,
|
|
# without prepending spaces
|
|
$$rstr =~ m/\G(?:\s(?:$re_comment))?\s*/gcs;
|
|
|
|
unless ($$rstr =~ m/\G(\S)/gc) {
|
|
return _match_error($rstr, "No block delimiter found");
|
|
}
|
|
my $ldel1 = $1;
|
|
my $ldel2;
|
|
|
|
if ($ldel1 =~ /[[(<{]/) {
|
|
(my $rdel1 = $ldel1) =~ tr/[({</])}>/;
|
|
my $re = _gen_re_str_in_delims_with_end_delim($rdel1);
|
|
$$rstr =~ /\G$re/gcs or return;
|
|
$$rstr =~ /\G(?:$re_comment)/gcs;
|
|
unless ($$rstr =~ /\G\s*(\S)/gc) {
|
|
return _match_error($rstr, "Missing second block");
|
|
}
|
|
$ldel2 = $1;
|
|
} else {
|
|
my $re = _gen_re_str_in_delims_with_end_delim($ldel1);
|
|
$$rstr =~ /\G$re/gcs or return;
|
|
$ldel2 = $ldel1;
|
|
}
|
|
|
|
if ($ldel2 =~ /[[(<{]/) {
|
|
(my $rdel2 = $ldel2) =~ tr/[({</])}>/;
|
|
my $re = _gen_re_str_in_delims_with_end_delim($rdel2);
|
|
$$rstr =~ /\G$re/gcs or return;
|
|
} else {
|
|
my $re = _gen_re_str_in_delims_with_end_delim($ldel2);
|
|
$$rstr =~ /\G$re/gcs or return;
|
|
}
|
|
|
|
$$rstr =~ m/\G[cdsr]*/gc;
|
|
my $endpos = pos($$rstr);
|
|
|
|
return substr($$rstr, $startpos, $endpos - $startpos);
|
|
}
|
|
|
|
sub _match_heredoc {
|
|
my ($self, $c, $rstr) = @_;
|
|
|
|
my $startpos = pos($$rstr) || 0;
|
|
|
|
$$rstr =~ m{\G(?:<<(~)?\s*)}gc;
|
|
my $indent = $1 ? "\\s*" : "";
|
|
|
|
my $label;
|
|
if ($$rstr =~ m{\G\\?([A-Za-z_]\w*)}gc) {
|
|
$label = $1;
|
|
} elsif ($$rstr =~ m{
|
|
\G ' ($re_str_in_single_quotes) '
|
|
| \G " ($re_str_in_double_quotes) "
|
|
| \G ` ($re_str_in_backticks) `
|
|
}gcsx) {
|
|
$label = $+;
|
|
} else {
|
|
return;
|
|
}
|
|
$label =~ s/\\(.)/$1/g;
|
|
my $extrapos = pos($$rstr);
|
|
$$rstr =~ m{\G.*\n}gc;
|
|
my $str1pos = pos($$rstr)--;
|
|
unless ($$rstr =~ m{\G.*?\n$indent(?=\Q$label\E\n)}gcs) {
|
|
return _match_error($rstr, qq{Missing here doc terminator ('$label')});
|
|
}
|
|
my $ldpos = pos($$rstr);
|
|
$$rstr =~ m{\G\Q$label\E\n}gc;
|
|
my $ld2pos = pos($$rstr);
|
|
|
|
my $heredoc = [
|
|
substr($$rstr, $str1pos, $ldpos-$str1pos),
|
|
substr($$rstr, $startpos, $extrapos-$startpos),
|
|
substr($$rstr, $ldpos, $ld2pos-$ldpos),
|
|
];
|
|
substr($$rstr, $str1pos, $ld2pos - $str1pos) = '';
|
|
pos($$rstr) = $extrapos;
|
|
if ($indent) {
|
|
$c->add_perl('5.026', '<<~');
|
|
}
|
|
return $heredoc;
|
|
}
|
|
|
|
sub _scan_re {
|
|
my ($self, $c, $rstr, $ldel, $rdel, $op) = @_;
|
|
my $startpos = pos($$rstr) || 0;
|
|
|
|
_debug(" L $ldel R $rdel") if DEBUG_RE;
|
|
|
|
my ($outer_opening_delimiter, $outer_closing_delimiter);
|
|
if (@{$c->{stack}}) {
|
|
($outer_closing_delimiter = $outer_opening_delimiter = $c->{stack}[-1][0]) =~ tr/[({</])}>/;
|
|
}
|
|
|
|
my @nesting = ($ldel);
|
|
my $multiline = 0;
|
|
my $saw_sharp = 0;
|
|
my $prev;
|
|
my ($p, $c1);
|
|
while (defined($p = pos($$rstr))) {
|
|
$c1 = substr($$rstr, $p, 1);
|
|
if ($c1 eq "\n") {
|
|
$$rstr =~ m{\G\n\s*}gcs;
|
|
$multiline = 1;
|
|
$saw_sharp = 0;
|
|
# _debug("CRLF") if DEBUG_RE;
|
|
next;
|
|
}
|
|
if ($c1 eq ' ' or $c1 eq "\t") {
|
|
$$rstr =~ m{\G\s*}gc;
|
|
# _debug("WHITESPACE") if DEBUG_RE;
|
|
next;
|
|
}
|
|
if ($c1 eq '#' and $rdel ne '#') {
|
|
if ($multiline and $$rstr =~ m{\G(#[^\Q$rdel\E]*?)\n}gcs) {
|
|
_debug(" comment $1") if DEBUG_RE
|
|
} else {
|
|
pos($$rstr) = $p + 1;
|
|
$saw_sharp = 1;
|
|
_debug(" saw #") if DEBUG_RE;
|
|
}
|
|
next;
|
|
}
|
|
|
|
if ($c1 eq '\\' and $rdel ne '\\') {
|
|
if ($$rstr =~ m/\G(\\.)/gcs) {
|
|
_debug(" escaped $1") if DEBUG_RE;
|
|
next;
|
|
}
|
|
}
|
|
|
|
_debug(" looking @nesting: $c1") if DEBUG_RE;
|
|
|
|
if ($c1 eq '[') {
|
|
# character class may have other (ignorable) delimiters
|
|
if ($$rstr =~ m/\G(\[\[:\w+?:\]\])/gcs) {
|
|
_debug(" character class $1") if DEBUG_RE;
|
|
next;
|
|
}
|
|
if ($$rstr =~ m/\G(\[[^\\\]]]*?(\\.[^\\\]]]*)*\])/gcs) {
|
|
_debug(" character class: $1") if DEBUG_RE;
|
|
next;
|
|
}
|
|
}
|
|
|
|
if ($c1 eq $rdel) {
|
|
pos($$rstr) = $p + 1;
|
|
if ($saw_sharp) {
|
|
my $tmp_pos = $p + 1;
|
|
if ($op eq 's') {
|
|
_debug(" looking for latter part") if DEBUG_RE;
|
|
my $latter = $self->_scan_re2($c, $rstr, $ldel, $op);
|
|
if (!defined $latter) {
|
|
pos($$rstr) = $tmp_pos;
|
|
next;
|
|
}
|
|
_debug(" latter: $latter") if DEBUG_RE;
|
|
}
|
|
if ($$rstr =~ m/\G[a-wyz]*x/) {
|
|
# looks like an end of block
|
|
_debug(" end of block $rdel (after #)") if DEBUG_RE;
|
|
@nesting = ();
|
|
pos($$rstr) = $tmp_pos;
|
|
last;
|
|
}
|
|
pos($$rstr) = $tmp_pos;
|
|
if ($multiline) {
|
|
next; # part of a comment
|
|
}
|
|
}
|
|
_debug(" end of block $rdel") if DEBUG_RE;
|
|
my $expected = $rdel;
|
|
if ($ldel ne $rdel) {
|
|
$expected =~ tr/)}]>/({[</;
|
|
}
|
|
while(my $nested = pop @nesting) {
|
|
last if $nested eq $expected;
|
|
}
|
|
last unless @nesting;
|
|
next;
|
|
} elsif ($c1 eq $ldel) {
|
|
pos($$rstr) = $p + 1;
|
|
if ($multiline and $saw_sharp) {
|
|
} else {
|
|
_debug(" block $ldel") if DEBUG_RE;
|
|
push @nesting, $ldel;
|
|
next;
|
|
}
|
|
}
|
|
|
|
if ($c1 eq '{') {
|
|
# quantifier shouldn't be nested
|
|
if ($$rstr =~ m/\G(\{[0-9]+(?:,(?:[0-9]+)?)?})/gcs) {
|
|
_debug(" quantifier $1") if DEBUG_RE;
|
|
next;
|
|
}
|
|
}
|
|
|
|
if ($c1 eq '(') {
|
|
my $c2 = substr($$rstr, $p + 1, 1);
|
|
if ($c2 eq '?' and !($multiline and $saw_sharp)) {
|
|
# code
|
|
if ($$rstr =~ m/\G((\()\?+?)(?=\{)/gc) {
|
|
_debug(" code $1") if DEBUG_RE;
|
|
push @nesting, $2;
|
|
unless (eval { $self->_scan($c, $rstr, F_EXPECTS_BRACKET); 1 }) {
|
|
_debug("scan failed") if DEBUG_RE;
|
|
return;
|
|
}
|
|
next;
|
|
}
|
|
# comment
|
|
if ($$rstr =~ m{\G(\(\?\#[^\\\)]*(?:\\.[^\\\)]*)*\))}gcs) {
|
|
_debug(" comment $1") if DEBUG_RE;
|
|
next;
|
|
}
|
|
}
|
|
|
|
# grouping may have (ignorable) <>
|
|
if ($$rstr =~ m/\G((\()(?:<[!=]|<\w+?>|>)?)/gc) {
|
|
_debug(" group $1") if DEBUG_RE;
|
|
push @nesting, $2;
|
|
next;
|
|
}
|
|
}
|
|
|
|
# maybe variables (maybe not)
|
|
if ($c1 eq '$' and substr($$rstr, $p + 1, 1) eq '{') {
|
|
my @tmp_stack = @{$c->{stack}};
|
|
next if eval { $self->_scan($c, $rstr, F_EXPECTS_BRACKET); 1 };
|
|
pos($$rstr) = $p;
|
|
$c->{stack} = \@tmp_stack;
|
|
}
|
|
|
|
if ($c1 eq ')') {
|
|
if (@nesting and $nesting[-1] eq '(') {
|
|
_debug(" end of group $c1") if DEBUG_RE;
|
|
pop @nesting;
|
|
pos($$rstr) = $p + 1;
|
|
next;
|
|
} else {
|
|
# die "unnested @nesting" unless $saw_sharp;
|
|
}
|
|
}
|
|
|
|
# for //, see if an outer closing delimiter is found first (ie. see if it was actually a /)
|
|
if (!$op) {
|
|
if ($outer_opening_delimiter and $c1 eq $outer_opening_delimiter) {
|
|
push @nesting, $c1;
|
|
pos($$rstr) = $p + 1;
|
|
next;
|
|
}
|
|
|
|
if ($outer_closing_delimiter and $c1 eq $outer_closing_delimiter) {
|
|
if (@nesting and $nesting[-1] eq $outer_opening_delimiter) {
|
|
pop @nesting;
|
|
pos($$rstr) = $p + 1;
|
|
next;
|
|
}
|
|
|
|
return _match_error($rstr, "Outer closing delimiter: $outer_closing_delimiter is found");
|
|
}
|
|
}
|
|
|
|
if ($$rstr =~ m/\G(\w+|.)/gcs) {
|
|
_debug(" rest $1") if DEBUG_RE;
|
|
next;
|
|
}
|
|
last;
|
|
}
|
|
if ($#nesting>=0) {
|
|
return _match_error($rstr, "Unmatched opening bracket(s): ". join("..",@nesting)."..");
|
|
}
|
|
|
|
my $endpos = pos($$rstr);
|
|
|
|
return substr($$rstr, $startpos, $endpos - $startpos);
|
|
}
|
|
|
|
|
|
sub _scan_re2 {
|
|
my ($self, $c, $rstr, $ldel, $op) = @_;
|
|
my $startpos = pos($$rstr);
|
|
|
|
if ($ldel =~ /[[(<{]/) {
|
|
$$rstr =~ /\G(?:$re_comment)/gcs;
|
|
|
|
unless ($$rstr =~ /\G\s*(\S)/gc) {
|
|
return _match_error($rstr, "Missing second block for quotelike $op");
|
|
}
|
|
$ldel = $1;
|
|
}
|
|
|
|
if ($ldel =~ /[[(<{]/) {
|
|
my ($rdel, $re_skip) = _gen_rdel_and_re_skip($ldel);
|
|
my @nest = $ldel;
|
|
my ($p, $c1);
|
|
while(defined($p = pos($$rstr))) {
|
|
$c1 = substr($$rstr, $p, 1);
|
|
if ($c1 eq '\\') {
|
|
pos($$rstr) = $p + 2;
|
|
next;
|
|
}
|
|
if ($c1 eq $ldel) {
|
|
pos($$rstr) = $p + 1;
|
|
push @nest, $ldel;
|
|
next;
|
|
}
|
|
if ($c1 eq $rdel) {
|
|
pos($$rstr) = $p + 1;
|
|
pop @nest;
|
|
last unless @nest;
|
|
next;
|
|
}
|
|
$$rstr =~ m{\G$re_skip}gc and next;
|
|
last;
|
|
}
|
|
return _match_error($rstr, "nesting mismatch: @nest") if @nest;
|
|
} else {
|
|
my $re = _gen_re_str_in_delims_with_end_delim($ldel);
|
|
$$rstr =~ /\G$re/gcs or return;
|
|
}
|
|
|
|
my $endpos = pos($$rstr);
|
|
|
|
return substr($$rstr, $startpos, $endpos - $startpos);
|
|
}
|
|
|
|
sub _use {
|
|
my ($c, $rstr, $tokens) = @_;
|
|
_debug("USE TOKENS: ".(Data::Dump::dump($tokens))) if DEBUG;
|
|
shift @$tokens; # discard 'use' itself
|
|
|
|
# TODO: see if the token is WORD or not?
|
|
my $name_token = shift @$tokens or return;
|
|
my $name = $name_token->[0];
|
|
return if !defined $name or ref $name or $name eq '';
|
|
|
|
my $c1 = substr($name, 0, 1);
|
|
if ($c1 eq '5') {
|
|
$c->add(perl => $name);
|
|
return;
|
|
}
|
|
if ($c1 eq 'v') {
|
|
my $c2 = substr($name, 1, 1);
|
|
if ($c2 eq '5') {
|
|
$c->add(perl => $name);
|
|
return;
|
|
}
|
|
if ($c2 eq '6') {
|
|
$c->{perl6} = 1;
|
|
$c->{ended} = 1;
|
|
return;
|
|
}
|
|
}
|
|
if ($c->enables_utf8($name)) {
|
|
$c->add($name => 0);
|
|
$c->{utf8} = 1;
|
|
if (!$c->{decoded}) {
|
|
$c->{decoded} = 1;
|
|
_debug("UTF8 IS ON") if DEBUG;
|
|
utf8::decode($$rstr);
|
|
pos($$rstr) = 0;
|
|
$c->{ended} = $c->{redo} = 1;
|
|
}
|
|
}
|
|
|
|
if (is_module_name($name)) {
|
|
my $maybe_version_token = $tokens->[0];
|
|
my $maybe_version_token_desc = $maybe_version_token->[1];
|
|
if ($maybe_version_token_desc and ($maybe_version_token_desc eq 'NUMBER' or $maybe_version_token_desc eq 'VERSION_STRING')) {
|
|
$c->add($name => $maybe_version_token->[0]);
|
|
shift @$tokens;
|
|
} else {
|
|
$c->add($name => 0);
|
|
}
|
|
|
|
if (exists $sub_keywords{$name}) {
|
|
$c->register_sub_keywords(@{$sub_keywords{$name}});
|
|
$c->prototype_re(qr{\G(\((?:[^\\\(\)]*(?:\\.[^\\\(\)]*)*)\))});
|
|
}
|
|
if (exists $filter_modules{$name}) {
|
|
my $tmp = pos($$rstr);
|
|
my $redo = $filter_modules{$name}->($rstr);
|
|
pos($$rstr) = $tmp;
|
|
$c->{ended} = $c->{redo} = 1 if $redo;
|
|
}
|
|
}
|
|
|
|
if ($c->has_callback_for(use => $name)) {
|
|
eval { $c->run_callback_for(use => $name, $tokens) };
|
|
warn "Callback Error: $@" if $@;
|
|
} elsif ($name =~ /\b(?:Mo[ou]se?X?|MooX?|Elk|Antlers|Role)\b/) {
|
|
my $module = $name =~ /Role/ ? 'Moose::Role' : 'Moose';
|
|
if ($c->has_callback_for(use => $module)) {
|
|
eval { $c->run_callback_for(use => $module, $tokens) };
|
|
warn "Callback Error: $@" if $@;
|
|
}
|
|
}
|
|
|
|
if (exists $unsupported_packages{$name}) {
|
|
$c->{found_unsupported_package} = $name;
|
|
}
|
|
}
|
|
|
|
sub _require {
|
|
my ($c, $rstr, $tokens) = @_;
|
|
_debug("REQUIRE TOKENS: ".(Data::Dump::dump($tokens))) if DEBUG;
|
|
shift @$tokens; # discard 'require' itself
|
|
|
|
# TODO: see if the token is WORD or not?
|
|
my $name_token = shift @$tokens or return;
|
|
my $name = $name_token->[0];
|
|
if (ref $name) {
|
|
$name = $name->[0];
|
|
return if $name =~ /\.pl$/i;
|
|
|
|
$name =~ s|/|::|g;
|
|
$name =~ s|\.pm$||i;
|
|
}
|
|
return if !defined $name or $name eq '';
|
|
|
|
my $c1 = substr($name, 0, 1);
|
|
if ($c1 eq '5') {
|
|
$c->add_conditional(perl => $name);
|
|
return;
|
|
}
|
|
if ($c1 eq 'v') {
|
|
my $c2 = substr($name, 1, 1);
|
|
if ($c2 eq '5') {
|
|
$c->add_conditional(perl => $name);
|
|
return;
|
|
}
|
|
if ($c2 eq '6') {
|
|
$c->{perl6} = 1;
|
|
$c->{ended} = 1;
|
|
return;
|
|
}
|
|
}
|
|
if (is_module_name($name)) {
|
|
$c->add_conditional($name => 0);
|
|
return;
|
|
}
|
|
}
|
|
|
|
sub _no {
|
|
my ($c, $rstr, $tokens) = @_;
|
|
_debug("NO TOKENS: ".(Data::Dump::dump($tokens))) if DEBUG;
|
|
shift @$tokens; # discard 'no' itself
|
|
|
|
# TODO: see if the token is WORD or not?
|
|
my $name_token = shift @$tokens or return;
|
|
my $name = $name_token->[0];
|
|
return if !defined $name or ref $name or $name eq '';
|
|
|
|
my $c1 = substr($name, 0, 1);
|
|
if ($c1 eq '5') {
|
|
$c->add_no(perl => $name);
|
|
return;
|
|
}
|
|
if ($c1 eq 'v') {
|
|
my $c2 = substr($name, 1, 1);
|
|
if ($c2 eq '5') {
|
|
$c->add_no(perl => $name);
|
|
return;
|
|
}
|
|
if ($c2 eq '6') {
|
|
$c->{perl6} = 1;
|
|
$c->{ended} = 1;
|
|
return;
|
|
}
|
|
}
|
|
if ($name eq 'utf8') {
|
|
$c->{utf8} = 0;
|
|
}
|
|
|
|
if (is_module_name($name)) {
|
|
my $maybe_version_token = $tokens->[0];
|
|
my $maybe_version_token_desc = $maybe_version_token->[1];
|
|
if ($maybe_version_token_desc and ($maybe_version_token_desc eq 'NUMBER' or $maybe_version_token_desc eq 'VERSION_STRING')) {
|
|
$c->add_no($name => $maybe_version_token->[0]);
|
|
shift @$tokens;
|
|
} else {
|
|
$c->add_no($name => 0);
|
|
}
|
|
}
|
|
|
|
if ($c->has_callback_for(no => $name)) {
|
|
eval { $c->run_callback_for(no => $name, $tokens) };
|
|
warn "Callback Error: $@" if $@;
|
|
return;
|
|
}
|
|
}
|
|
|
|
1;
|
|
|
|
__END__
|
|
|
|
=encoding utf-8
|
|
|
|
=head1 NAME
|
|
|
|
Perl::PrereqScanner::NotQuiteLite - a tool to scan your Perl code for its prerequisites
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use Perl::PrereqScanner::NotQuiteLite;
|
|
my $scanner = Perl::PrereqScanner::NotQuiteLite->new(
|
|
parsers => [qw/:installed -UniversalVersion/],
|
|
suggests => 1,
|
|
perl_minimum_version => 1,
|
|
);
|
|
my $context = $scanner->scan_file('path/to/file');
|
|
my $requirements = $context->requires;
|
|
my $recommends = $context->recommends;
|
|
my $suggestions = $context->suggests; # requirements in evals
|
|
my $noes = $context->noes;
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
Perl::PrereqScanner::NotQuiteLite is yet another prerequisites
|
|
scanner. It passes almost all the scanning tests for
|
|
L<Perl::PrereqScanner> and L<Module::ExtractUse> (ie. except for
|
|
a few dubious ones), and runs slightly faster than PPI-based
|
|
Perl::PrereqScanner. However, it doesn't run as fast as
|
|
L<Perl::PrereqScanner::Lite> (which uses an XS lexer).
|
|
|
|
Perl::PrereqScanner::NotQuiteLite also recognizes C<eval>.
|
|
Prerequisites in C<eval> are not considered as requirements, but you
|
|
can collect them as suggestions.
|
|
|
|
Conditional requirements or requirements loaded in a block are
|
|
treated as recommends. Noed modules are stored separately (since 0.94).
|
|
You may or may not need to merge them into requires.
|
|
|
|
Perl::PrereqScanner::NotQuiteLite can also recognize some of
|
|
the new language features such as C<say>, subroutine signatures,
|
|
and postfix dereferences, to improve the minimum perl requirement
|
|
(since 0.9905).
|
|
|
|
=head1 METHODS
|
|
|
|
=head2 new
|
|
|
|
creates a scanner object. Options are:
|
|
|
|
=over 4
|
|
|
|
=item parsers
|
|
|
|
By default, Perl::PrereqScanner::NotQuiteLite only recognizes
|
|
modules loaded directly by C<use>, C<require>, C<no> statements,
|
|
plus modules loaded by a few common modules such as C<base>,
|
|
C<parent>, C<if> (that are in the Perl core), and by two keywords
|
|
exported by L<Moose> family (C<extends> and C<with>).
|
|
|
|
If you need more, you can pass extra parser names to the scanner,
|
|
or C<:bundled>, which loads and registers all the parsers bundled
|
|
with this distribution. If you have your own parsers, you can
|
|
specify C<:installed> to load and register all the installed parsers.
|
|
|
|
You can also pass a project-specific parser (that lies outside the
|
|
C<Perl::PrereqScanner::NotQuiteLite::Parser> namespace) by
|
|
prepending C<+> to the name.
|
|
|
|
use Perl::PrereqScanner::NotQuiteLite;
|
|
my $scanner = Perl::PrereqScanner::NotQuiteLite->new(
|
|
parsers => [qw/+PrereqParser::For::MyProject/],
|
|
);
|
|
|
|
If you don't want to load a specific parser for some reason,
|
|
prepend C<-> to the parser name.
|
|
|
|
=item suggests
|
|
|
|
Perl::PrereqScanner::NotQuiteLite ignores C<use>-like statements in
|
|
C<eval> by default. If you set this option to true,
|
|
Perl::PrereqScanner::NotQuiteLite also parses statements in C<eval>,
|
|
and records requirements as suggestions.
|
|
|
|
=item recommends
|
|
|
|
Perl::PrereqScanner::NotQuiteLite usually ignores C<require>-like
|
|
statements in a block by default. If you set this option to true,
|
|
Perl::PrereqScanner::NotQuiteLite also records requirements in
|
|
a block as recommendations.
|
|
|
|
=item perl_minimum_version
|
|
|
|
If you set this option to true, Perl::PrereqScanner::NotQuiteLite
|
|
adds a specific version of perl as a requirement when it finds
|
|
some of the new perl language features.
|
|
|
|
=back
|
|
|
|
=head2 scan_file
|
|
|
|
takes a path to a file and returns a ::Context object.
|
|
|
|
=head2 scan_string
|
|
|
|
takes a string, scans and returns a ::Context object.
|
|
|
|
=head1 SEE ALSO
|
|
|
|
L<Perl::PrereqScanner>, L<Perl::PrereqScanner::Lite>, L<Module::ExtractUse>
|
|
|
|
L<Perl::PrereqScanner::NotQuiteLite::App> to scan a whole distribution.
|
|
|
|
L<scan-perl-prereqs-nqlite> is a command line interface of the above.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Kenichi Ishigaki, E<lt>ishigaki@cpan.orgE<gt>
|
|
|
|
=head1 COPYRIGHT AND LICENSE
|
|
|
|
This software is copyright (c) 2015 by Kenichi Ishigaki.
|
|
|
|
This is free software; you can redistribute it and/or modify it under
|
|
the same terms as the Perl 5 programming language system itself.
|
|
|
|
=cut
|