480 lines
12 KiB
Perl
480 lines
12 KiB
Perl
package PPI::Token::Unknown;
|
|
|
|
=pod
|
|
|
|
=head1 NAME
|
|
|
|
PPI::Token::Unknown - Token of unknown or as-yet undetermined type
|
|
|
|
=head1 INHERITANCE
|
|
|
|
PPI::Token::Unknown
|
|
isa PPI::Token
|
|
isa PPI::Element
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
Object of the type C<PPI::Token::Unknown> exist primarily inside the
|
|
tokenizer, where they are temporarily brought into existing for a very
|
|
short time to represent a token that could be one of a number of types.
|
|
|
|
Generally, they only exist for a character or two, after which they are
|
|
resolved and converted into the correct type. For an object of this type
|
|
to survive the parsing process is considered a major bug.
|
|
|
|
Please report any C<PPI::Token::Unknown> you encounter in a L<PPI::Document>
|
|
object as a bug.
|
|
|
|
=cut
|
|
|
|
use strict;
|
|
use PPI::Token ();
|
|
use PPI::Exception ();
|
|
use PPI::Singletons qw' %MAGIC $CURLY_SYMBOL ';
|
|
|
|
our $VERSION = '1.270'; # VERSION
|
|
|
|
our @ISA = "PPI::Token";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#####################################################################
|
|
# Tokenizer Methods
|
|
|
|
sub __TOKENIZER__on_char {
|
|
my ( $self, $t ) = @_; # Self and Tokenizer
|
|
my $c = $t->{token}->{content}; # Current token
|
|
my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); # Current character
|
|
|
|
# Now, we split on the different values of the current content
|
|
if ( $c eq '*' ) {
|
|
# Is it a number?
|
|
if ( $char =~ /\d/ ) {
|
|
# bitwise operator
|
|
$t->{class} = $t->{token}->set_class( 'Operator' );
|
|
return $t->_finalize_token->__TOKENIZER__on_char( $t );
|
|
}
|
|
|
|
if ( $char =~ /[\w:]/ ) {
|
|
# Symbol (unless the thing before it is a number
|
|
my ( $prev ) = $t->_previous_significant_tokens(1);
|
|
if ( not $prev or not $prev->isa('PPI::Token::Number') ) {
|
|
$t->{class} = $t->{token}->set_class( 'Symbol' );
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if ( $char eq '{' ) {
|
|
# Get rest of line
|
|
pos $t->{line} = $t->{line_cursor} + 1;
|
|
if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) {
|
|
# control-character symbol (e.g. *{^_Foo})
|
|
$t->{class} = $t->{token}->set_class( 'Magic' );
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
# Postfix dereference: ->**
|
|
if ( $char eq '*' ) {
|
|
my ( $prev ) = $t->_previous_significant_tokens(1);
|
|
if ( $prev and $prev->isa('PPI::Token::Operator') and $prev->content eq '->' ) {
|
|
$t->{class} = $t->{token}->set_class( 'Cast' );
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if ( $char eq '*' || $char eq '=' ) {
|
|
# Power operator '**' or mult-assign '*='
|
|
$t->{class} = $t->{token}->set_class( 'Operator' );
|
|
return 1;
|
|
}
|
|
|
|
return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char);
|
|
|
|
$t->{class} = $t->{token}->set_class( 'Operator' );
|
|
return $t->_finalize_token->__TOKENIZER__on_char( $t );
|
|
|
|
|
|
|
|
} elsif ( $c eq '$' ) {
|
|
# Postfix dereference: ->$* ->$#*
|
|
if ( $char eq '*' || $char eq '#' ) {
|
|
my ( $prev ) = $t->_previous_significant_tokens(1);
|
|
if ( $prev and $prev->isa('PPI::Token::Operator') and $prev->content eq '->' ) {
|
|
$t->{class} = $t->{token}->set_class( 'Cast' );
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if ( $char =~ /[a-z_]/i ) {
|
|
# Symbol
|
|
$t->{class} = $t->{token}->set_class( 'Symbol' );
|
|
return 1;
|
|
}
|
|
|
|
if ( $MAGIC{ $c . $char } ) {
|
|
# Magic variable
|
|
$t->{class} = $t->{token}->set_class( 'Magic' );
|
|
return 1;
|
|
}
|
|
|
|
if ( $char eq '{' ) {
|
|
# Get rest of line
|
|
pos $t->{line} = $t->{line_cursor} + 1;
|
|
if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) {
|
|
# control-character symbol (e.g. ${^MATCH})
|
|
$t->{class} = $t->{token}->set_class( 'Magic' );
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
# Must be a cast
|
|
$t->{class} = $t->{token}->set_class( 'Cast' );
|
|
return $t->_finalize_token->__TOKENIZER__on_char( $t );
|
|
|
|
|
|
|
|
} elsif ( $c eq '@' ) {
|
|
# Postfix dereference: ->@*
|
|
if ( $char eq '*' ) {
|
|
my ( $prev ) = $t->_previous_significant_tokens(1);
|
|
if ( $prev and $prev->isa('PPI::Token::Operator') and $prev->content eq '->' ) {
|
|
$t->{class} = $t->{token}->set_class( 'Cast' );
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if ( $char =~ /[\w:]/ ) {
|
|
# Symbol
|
|
$t->{class} = $t->{token}->set_class( 'Symbol' );
|
|
return 1;
|
|
}
|
|
|
|
if ( $MAGIC{ $c . $char } ) {
|
|
# Magic variable
|
|
$t->{class} = $t->{token}->set_class( 'Magic' );
|
|
return 1;
|
|
}
|
|
|
|
if ( $char eq '{' ) {
|
|
# Get rest of line
|
|
pos $t->{line} = $t->{line_cursor} + 1;
|
|
if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) {
|
|
# control-character symbol (e.g. @{^_Foo})
|
|
$t->{class} = $t->{token}->set_class( 'Magic' );
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
# Must be a cast
|
|
$t->{class} = $t->{token}->set_class( 'Cast' );
|
|
return $t->_finalize_token->__TOKENIZER__on_char( $t );
|
|
|
|
|
|
|
|
} elsif ( $c eq '%' ) {
|
|
# Postfix dereference: ->%* ->%[...]
|
|
if ( $char eq '*' || $char eq '[' ) {
|
|
my ( $prev ) = $t->_previous_significant_tokens(1);
|
|
if ( $prev and $prev->isa('PPI::Token::Operator') and $prev->content eq '->' ) {
|
|
if ( $char eq '*' ) {
|
|
$t->{class} = $t->{token}->set_class( 'Cast' );
|
|
return 1;
|
|
}
|
|
if ( $char eq '[' ) {
|
|
$t->{class} = $t->{token}->set_class( 'Cast' );
|
|
return $t->_finalize_token->__TOKENIZER__on_char( $t );
|
|
}
|
|
}
|
|
}
|
|
|
|
# Is it a number?
|
|
if ( $char =~ /\d/ ) {
|
|
# bitwise operator
|
|
$t->{class} = $t->{token}->set_class( 'Operator' );
|
|
return $t->_finalize_token->__TOKENIZER__on_char( $t );
|
|
}
|
|
|
|
# Is it a magic variable?
|
|
if ( $char eq '^' || $MAGIC{ $c . $char } ) {
|
|
$t->{class} = $t->{token}->set_class( 'Magic' );
|
|
return 1;
|
|
}
|
|
|
|
if ( $char =~ /[\w:]/ ) {
|
|
# Symbol (unless the thing before it is a number
|
|
my ( $prev ) = $t->_previous_significant_tokens(1);
|
|
if ( not $prev or not $prev->isa('PPI::Token::Number') ) {
|
|
$t->{class} = $t->{token}->set_class( 'Symbol' );
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if ( $char eq '{' ) {
|
|
# Get rest of line
|
|
pos $t->{line} = $t->{line_cursor} + 1;
|
|
if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) {
|
|
# control-character symbol (e.g. %{^_Foo})
|
|
$t->{class} = $t->{token}->set_class( 'Magic' );
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char);
|
|
|
|
# Probably the mod operator
|
|
$t->{class} = $t->{token}->set_class( 'Operator' );
|
|
return $t->{class}->__TOKENIZER__on_char( $t );
|
|
|
|
|
|
|
|
} elsif ( $c eq '&' ) {
|
|
# Postfix dereference: ->&*
|
|
if ( $char eq '*' ) {
|
|
my ( $prev ) = $t->_previous_significant_tokens(1);
|
|
if ( $prev and $prev->isa('PPI::Token::Operator') and $prev->content eq '->' ) {
|
|
$t->{class} = $t->{token}->set_class( 'Cast' );
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
# Is it a number?
|
|
if ( $char =~ /\d/ ) {
|
|
# bitwise operator
|
|
$t->{class} = $t->{token}->set_class( 'Operator' );
|
|
return $t->_finalize_token->__TOKENIZER__on_char( $t );
|
|
}
|
|
|
|
if ( $char =~ /[\w:]/ ) {
|
|
# Symbol (unless the thing before it is a number
|
|
my ( $prev ) = $t->_previous_significant_tokens(1);
|
|
if ( not $prev or not $prev->isa('PPI::Token::Number') ) {
|
|
$t->{class} = $t->{token}->set_class( 'Symbol' );
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char);
|
|
|
|
# Probably the binary and operator
|
|
$t->{class} = $t->{token}->set_class( 'Operator' );
|
|
return $t->{class}->__TOKENIZER__on_char( $t );
|
|
|
|
|
|
|
|
} elsif ( $c eq '-' ) {
|
|
if ( $char =~ /\d/o ) {
|
|
# Number
|
|
$t->{class} = $t->{token}->set_class( 'Number' );
|
|
return 1;
|
|
}
|
|
|
|
if ( $char eq '.' ) {
|
|
# Number::Float
|
|
$t->{class} = $t->{token}->set_class( 'Number::Float' );
|
|
return 1;
|
|
}
|
|
|
|
if ( $char =~ /[a-zA-Z]/ ) {
|
|
$t->{class} = $t->{token}->set_class( 'DashedWord' );
|
|
return 1;
|
|
}
|
|
|
|
# The numeric negative operator
|
|
$t->{class} = $t->{token}->set_class( 'Operator' );
|
|
return $t->{class}->__TOKENIZER__on_char( $t );
|
|
|
|
|
|
|
|
} elsif ( $c eq ':' ) {
|
|
if ( $char eq ':' ) {
|
|
# ::foo style bareword
|
|
$t->{class} = $t->{token}->set_class( 'Word' );
|
|
return 1;
|
|
}
|
|
|
|
# Now, : acts very very differently in different contexts.
|
|
# Mainly, we need to find out if this is a subroutine attribute.
|
|
# We'll leave a hint in the token to indicate that, if it is.
|
|
if ( $self->__TOKENIZER__is_an_attribute( $t ) ) {
|
|
# This : is an attribute indicator
|
|
$t->{class} = $t->{token}->set_class( 'Operator' );
|
|
$t->{token}->{_attribute} = 1;
|
|
return $t->_finalize_token->__TOKENIZER__on_char( $t );
|
|
}
|
|
|
|
# It MIGHT be a label, but it's probably the ?: trinary operator
|
|
$t->{class} = $t->{token}->set_class( 'Operator' );
|
|
return $t->{class}->__TOKENIZER__on_char( $t );
|
|
}
|
|
|
|
# erm...
|
|
PPI::Exception->throw('Unknown value in PPI::Token::Unknown token');
|
|
}
|
|
|
|
sub _is_cast_or_op {
|
|
my ( $self, $char ) = @_;
|
|
return 1 if $char eq '$';
|
|
return 1 if $char eq '@';
|
|
return 1 if $char eq '%';
|
|
return 1 if $char eq '*';
|
|
return 1 if $char eq '{';
|
|
return;
|
|
}
|
|
|
|
sub _as_cast_or_op {
|
|
my ( $self, $t ) = @_;
|
|
my $class = _cast_or_op( $t );
|
|
$t->{class} = $t->{token}->set_class( $class );
|
|
return $t->_finalize_token->__TOKENIZER__on_char( $t );
|
|
}
|
|
|
|
sub _prev_significant_w_cursor {
|
|
my ( $tokens, $cursor, $extra_check ) = @_;
|
|
while ( $cursor >= 0 ) {
|
|
my $token = $tokens->[ $cursor-- ];
|
|
next if !$token->significant;
|
|
next if $extra_check and !$extra_check->($token);
|
|
return ( $token, $cursor );
|
|
}
|
|
return ( undef, $cursor );
|
|
}
|
|
|
|
# Operator/operand-sensitive, multiple or GLOB cast
|
|
sub _cast_or_op {
|
|
my ( $t ) = @_;
|
|
|
|
my $tokens = $t->{tokens};
|
|
my $cursor = scalar( @$tokens ) - 1;
|
|
my $token;
|
|
|
|
( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor );
|
|
return 'Cast' if !$token; # token was first in the document
|
|
|
|
if ( $token->isa( 'PPI::Token::Structure' ) and $token->content eq '}' ) {
|
|
|
|
# Scan the token stream backwards an arbitrarily long way,
|
|
# looking for the matching opening curly brace.
|
|
my $structure_depth = 1;
|
|
( $token, $cursor ) = _prev_significant_w_cursor(
|
|
$tokens, $cursor,
|
|
sub {
|
|
my ( $token ) = @_;
|
|
return if !$token->isa( 'PPI::Token::Structure' );
|
|
if ( $token eq '}' ) {
|
|
$structure_depth++;
|
|
return;
|
|
}
|
|
if ( $token eq '{' ) {
|
|
$structure_depth--;
|
|
return if $structure_depth;
|
|
}
|
|
return 1;
|
|
}
|
|
);
|
|
return 'Operator' if !$token; # no matching '{', probably an unbalanced '}'
|
|
|
|
# Scan past any whitespace
|
|
( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor );
|
|
return 'Operator' if !$token; # Document began with what must be a hash constructor.
|
|
return 'Operator' if $token->isa( 'PPI::Token::Symbol' ); # subscript
|
|
|
|
my %meth_or_subscript_end = map { $_ => 1 } qw@ -> } ] @;
|
|
return 'Operator' if $meth_or_subscript_end{ $token->content }; # subscript
|
|
|
|
my $content = $token->content;
|
|
my $produces_or_wants_value =
|
|
( $token->isa( 'PPI::Token::Word' ) and ( $content eq 'do' or $content eq 'eval' ) );
|
|
return $produces_or_wants_value ? 'Operator' : 'Cast';
|
|
}
|
|
|
|
my %list_start_or_term_end = map { $_ => 1 } qw@ ; ( { [ @;
|
|
return 'Cast'
|
|
if $token->isa( 'PPI::Token::Structure' ) and $list_start_or_term_end{ $token->content }
|
|
or $token->isa( 'PPI::Token::Cast' )
|
|
or $token->isa( 'PPI::Token::Operator' )
|
|
or $token->isa( 'PPI::Token::Label' );
|
|
|
|
return 'Operator' if !$token->isa( 'PPI::Token::Word' );
|
|
|
|
( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor );
|
|
return 'Cast' if !$token || $token->content ne '->';
|
|
|
|
return 'Operator';
|
|
}
|
|
|
|
# Are we at a location where a ':' would indicate a subroutine attribute
|
|
sub __TOKENIZER__is_an_attribute {
|
|
my $t = $_[1]; # Tokenizer object
|
|
my @tokens = $t->_previous_significant_tokens(3);
|
|
my $p0 = $tokens[0];
|
|
return '' if not $p0;
|
|
|
|
# If we just had another attribute, we are also an attribute
|
|
return 1 if $p0->isa('PPI::Token::Attribute');
|
|
|
|
# If we just had a prototype, then we are an attribute
|
|
return 1 if $p0->isa('PPI::Token::Prototype');
|
|
|
|
# Other than that, we would need to have had a bareword
|
|
return '' unless $p0->isa('PPI::Token::Word');
|
|
|
|
# We could be an anonymous subroutine
|
|
if ( $p0->isa('PPI::Token::Word') and $p0->content eq 'sub' ) {
|
|
return 1;
|
|
}
|
|
|
|
# Or, we could be a named subroutine
|
|
my $p1 = $tokens[1];
|
|
my $p2 = $tokens[2];
|
|
if (
|
|
$p1
|
|
and
|
|
$p1->isa('PPI::Token::Word')
|
|
and
|
|
$p1->content eq 'sub'
|
|
and (
|
|
not $p2
|
|
or
|
|
$p2->isa('PPI::Token::Structure')
|
|
or (
|
|
$p2->isa('PPI::Token::Whitespace')
|
|
and
|
|
$p2->content eq ''
|
|
)
|
|
)
|
|
) {
|
|
return 1;
|
|
}
|
|
|
|
# We aren't an attribute
|
|
'';
|
|
}
|
|
|
|
1;
|
|
|
|
=pod
|
|
|
|
=head1 SUPPORT
|
|
|
|
See the L<support section|PPI/SUPPORT> in the main module.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
|
|
|
|
=head1 COPYRIGHT
|
|
|
|
Copyright 2001 - 2011 Adam Kennedy.
|
|
|
|
This program is free software; you can redistribute
|
|
it and/or modify it under the same terms as Perl itself.
|
|
|
|
The full text of the license can be found in the
|
|
LICENSE file included with this module.
|
|
|
|
=cut
|