Initial Commit

This commit is contained in:
Riley Schneider
2025-12-03 16:38:10 +01:00
parent c5e26bf594
commit b732d8d4b5
17680 changed files with 5977495 additions and 2 deletions

View File

@@ -0,0 +1,77 @@
package PPI::Token::ArrayIndex;
=pod
=head1 NAME
PPI::Token::ArrayIndex - Token getting the last index for an array
=head1 INHERITANCE
PPI::Token::ArrayIndex
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The C<PPI::Token::ArrayIndex> token represents an attempt to get the
last index of an array, such as C<$#array>.
=head1 METHODS
There are no additional methods beyond those provided by the parent
L<PPI::Token> and L<PPI::Element> classes.
=cut
use strict;
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
#####################################################################
# Tokenizer Methods
sub __TOKENIZER__on_char {
my $t = $_[1];
# Suck in till the end of the arrayindex
pos $t->{line} = $t->{line_cursor};
if ( $t->{line} =~ m/\G([\w:']+)/gc ) {
$t->{token}->{content} .= $1;
$t->{line_cursor} += length $1;
}
# End of token
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,176 @@
package PPI::Token::Attribute;
=pod
=head1 NAME
PPI::Token::Attribute - A token for a subroutine attribute
=head1 INHERITANCE
PPI::Token::Attribute
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
In Perl, attributes are a relatively recent addition to the language.
Given the code C< sub foo : bar(something) {} >, the C<bar(something)>
part is the attribute.
A C<PPI::Token::Attribute> token represents the entire of the attribute,
as the braces and its contents are not parsed into the tree, and are
treated by Perl (and thus by us) as a single string.
=head1 METHODS
This class provides some additional methods beyond those provided by its
L<PPI::Token> and L<PPI::Element> parent classes.
=cut
use strict;
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
#####################################################################
# PPI::Token::Attribute Methods
=pod
=head2 identifier
The C<identifier> attribute returns the identifier part of the attribute.
That is, for the attribute C<foo(bar)>, the C<identifier> method would
return C<"foo">.
=cut
sub identifier {
my $self = shift;
$self->{content} =~ /^(.+?)\(/ ? $1 : $self->{content};
}
=pod
=head2 parameters
The C<parameters> method returns the parameter string for the attribute.
That is, for the attribute C<foo(bar)>, the C<parameters> method would
return C<"bar">.
Returns the parameters as a string (including the null string C<''> for
the case of an attribute such as C<foo()>.)
Returns C<undef> if the attribute does not have parameters.
=cut
sub parameters {
my $self = shift;
$self->{content} =~ /\((.*)\)$/ ? $1 : undef;
}
#####################################################################
# Tokenizer Methods
sub __TOKENIZER__on_char {
my $class = shift;
my $t = shift;
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
# Unless this is a '(', we are finished.
unless ( $char eq '(' ) {
# Finalise and recheck
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
# This is a bar(...) style attribute.
# We are currently on the ( so scan in until the end.
# We finish on the character AFTER our end
my $string = $class->__TOKENIZER__scan_for_end( $t );
if ( ref $string ) {
# EOF
$t->{token}->{content} .= $$string;
$t->_finalize_token;
return 0;
}
# Found the end of the attribute
$t->{token}->{content} .= $string;
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
# Scan for a close braced, and take into account both escaping,
# and open close bracket pairs in the string. When complete, the
# method leaves the line cursor on the LAST character found.
sub __TOKENIZER__scan_for_end {
my $t = $_[1];
# Loop as long as we can get new lines
my $string = '';
my $depth = 0;
while ( exists $t->{line} ) {
# Get the search area
pos $t->{line} = $t->{line_cursor};
# Look for a match
unless ( $t->{line} =~ /\G((?:\\.|[^()])*?[()])/gc ) {
# Load in the next line and push to first character
$string .= substr( $t->{line}, $t->{line_cursor} );
$t->_fill_line(1) or return \$string;
$t->{line_cursor} = 0;
next;
}
# Add to the string
$string .= $1;
$t->{line_cursor} += length $1;
# Alter the depth and continue if we aren't at the end
$depth += ($1 =~ /\($/) ? 1 : -1 and next;
# Found the end
return $string;
}
# Returning the string as a reference indicates EOF
\$string;
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,113 @@
package PPI::Token::BOM;
=pod
=head1 NAME
PPI::Token::BOM - Tokens representing Unicode byte order marks
=head1 INHERITANCE
PPI::Token::BOM
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
This is a special token in that it can only occur at the beginning of
documents. If a BOM byte mark occurs elsewhere in a file, it should
be treated as L<PPI::Token::Whitespace>. We recognize the byte order
marks identified at this URL:
L<http://www.unicode.org/faq/utf_bom.html#BOM>
UTF-32, big-endian 00 00 FE FF
UTF-32, little-endian FF FE 00 00
UTF-16, big-endian FE FF
UTF-16, little-endian FF FE
UTF-8 EF BB BF
Note that as of this writing, PPI only has support for UTF-8
(namely, in POD and strings) and no support for UTF-16 or UTF-32. We
support the BOMs of the latter two for completeness only.
The BOM is considered non-significant, like white space.
=head1 METHODS
There are no additional methods beyond those provided by the parent
L<PPI::Token> and L<PPI::Element> classes.
=cut
use strict;
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
sub significant() { '' }
#####################################################################
# Parsing Methods
my %bom_types = (
"\x00\x00\xfe\xff" => 'UTF-32',
"\xff\xfe\x00\x00" => 'UTF-32',
"\xfe\xff" => 'UTF-16',
"\xff\xfe" => 'UTF-16',
"\xef\xbb\xbf" => 'UTF-8',
);
sub __TOKENIZER__on_line_start {
my $t = $_[1];
$_ = $t->{line};
if (m/^(\x00\x00\xfe\xff | # UTF-32, big-endian
\xff\xfe\x00\x00 | # UTF-32, little-endian
\xfe\xff | # UTF-16, big-endian
\xff\xfe | # UTF-16, little-endian
\xef\xbb\xbf) # UTF-8
/xs) {
my $bom = $1;
if ($bom_types{$bom} ne 'UTF-8') {
return $t->_error("$bom_types{$bom} is not supported");
}
$t->_new_token('BOM', $bom) or return undef;
$t->{line_cursor} += length $bom;
}
# Continue just as if there was no BOM
$t->{class} = 'PPI::Token::Whitespace';
return $t->{class}->__TOKENIZER__on_line_start($t);
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module
=head1 AUTHOR
Chris Dolan E<lt>cdolan@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,87 @@
package PPI::Token::Cast;
=pod
=head1 NAME
PPI::Token::Cast - A prefix which forces a value into a different context
=head1 INHERITANCE
PPI::Token::Cast
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
A "cast" in PPI terms is one of more characters used as a prefix which force
a value into a different class or context.
This includes referencing, dereferencing, and a few other minor cases.
For expressions such as C<@$foo> or C<@{ $foo{bar} }> the C<@> in both cases
represents a cast. In this case, an array dereference.
=head1 METHODS
There are no additional methods beyond those provided by the parent
L<PPI::Token> and L<PPI::Element> classes.
=cut
use strict;
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
our %POSTFIX = map { $_ => 1 } (
qw{
%* @* $*
},
'$#*' # throws warnings if it's inside a qw
);
#####################################################################
# Tokenizer Methods
# A cast is either % @ $ or $#
# and also postfix dereference are %* @* $* $#*
sub __TOKENIZER__on_char {
my $t = $_[1];
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
# Are we still an operator if we add the next character
my $content = $t->{token}->{content};
return 1 if $POSTFIX{ $content . $char };
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,146 @@
package PPI::Token::Comment;
=pod
=head1 NAME
PPI::Token::Comment - A comment in Perl source code
=head1 INHERITANCE
PPI::Token::Comment
isa PPI::Token
isa PPI::Element
=head1 SYNOPSIS
# This is a PPI::Token::Comment
print "Hello World!"; # So it this
$string =~ s/ foo # This, unfortunately, is not :(
bar
/w;
=head1 DESCRIPTION
In PPI, comments are represented by C<PPI::Token::Comment> objects.
These come in two flavours, line comment and inline comments.
A C<line comment> is a comment that stands on its own line. These comments
hold their own newline and whitespace (both leading and trailing) as part
of the one C<PPI::Token::Comment> object.
An inline comment is a comment that appears after some code, and
continues to the end of the line. This does B<not> include whitespace,
and the terminating newlines is considered a separate
L<PPI::Token::Whitespace> token.
This is largely a convenience, simplifying a lot of normal code relating
to the common things people do with comments.
Most commonly, it means when you C<prune> or C<delete> a comment, a line
comment disappears taking the entire line with it, and an inline comment
is removed from the inside of the line, allowing the newline to drop
back onto the end of the code, as you would expect.
It also means you can move comments around in blocks much more easily.
For now, this is a suitably handy way to do things. However, I do reserve
the right to change my mind on this one if it gets dangerously
anachronistic somewhere down the line.
=head1 METHODS
Only very limited methods are available, beyond those provided by our
parent L<PPI::Token> and L<PPI::Element> classes.
=cut
use strict;
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
### XS -> PPI/XS.xs:_PPI_Token_Comment__significant 0.900+
sub significant() { '' }
# Most stuff goes through __TOKENIZER__commit.
# This is such a rare case, do char at a time to keep the code small
sub __TOKENIZER__on_char {
my $t = $_[1];
# Make sure not to include the trailing newline
if ( substr( $t->{line}, $t->{line_cursor}, 1 ) eq "\n" ) {
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
1;
}
sub __TOKENIZER__commit {
my $t = $_[1];
# Get the rest of the line
my $rest = substr( $t->{line}, $t->{line_cursor} );
if ( chomp $rest ) { # Include the newline separately
# Add the current token, and the newline
$t->_new_token('Comment', $rest);
$t->_new_token('Whitespace', "\n");
} else {
# Add this token only
$t->_new_token('Comment', $rest);
}
# Advance the line cursor to the end
$t->{line_cursor} = $t->{line_length} - 1;
0;
}
# Comments end at the end of the line
sub __TOKENIZER__on_line_end {
$_[1]->_finalize_token if $_[1]->{token};
1;
}
=pod
=head2 line
The C<line> accessor returns true if the C<PPI::Token::Comment> is a
line comment, or false if it is an inline comment.
=cut
sub line {
# Entire line comments have a newline at the end
$_[0]->{content} =~ /\n$/ ? 1 : 0;
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,96 @@
package PPI::Token::DashedWord;
=pod
=head1 NAME
PPI::Token::DashedWord - A dashed bareword token
=head1 INHERITANCE
PPI::Token::DashedWord
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The "dashed bareword" token represents literal values like C<-foo>.
NOTE: this class is currently unused. All tokens that should be
PPI::Token::DashedWords are just normal PPI::Token::Word instead.
That actually makes sense, since there really is nothing special about
this class except that dashed words cannot be subroutine names or
keywords. As such, this class may be removed from PPI in the future.
=head1 METHODS
=cut
use strict;
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
=pod
=head2 literal
Returns the value of the dashed word as a string. This differs from
C<content> because C<-Foo'Bar> expands to C<-Foo::Bar>.
=cut
*literal = *PPI::Token::Word::literal;
#####################################################################
# Tokenizer Methods
sub __TOKENIZER__on_char {
my $t = $_[1];
# Suck to the end of the dashed bareword
pos $t->{line} = $t->{line_cursor};
if ( $t->{line} =~ m/\G(\w+)/gc ) {
$t->{token}->{content} .= $1;
$t->{line_cursor} += length $1;
}
# Are we a file test operator?
if ( $t->{token}->{content} =~ /^\-[rwxoRWXOezsfdlpSbctugkTBMAC]$/ ) {
# File test operator
$t->{class} = $t->{token}->set_class( 'Operator' );
} else {
# No, normal dashed bareword
$t->{class} = $t->{token}->set_class( 'Word' );
}
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,102 @@
package PPI::Token::Data;
=pod
=head1 NAME
PPI::Token::Data - The actual data in the __DATA__ section of a file
=head1 INHERITANCE
PPI::Token::Data
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The C<PPI::Token::Data> class is used to represent the actual data inside
a file's C<__DATA__> section.
One C<PPI::Token::Data> object is used to represent the entire of the data,
primarily so that it can provide a convenient handle directly to the data.
=head1 METHODS
C<PPI::Token::Data> provides one method in addition to those provided by
our parent L<PPI::Token> and L<PPI::Element> classes.
=cut
use strict;
use IO::String 1.07 ();
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
#####################################################################
# Methods
=pod
=head2 handle
The C<handle> method returns a L<IO::String> handle that allows you
to do all the normal handle-y things to the contents of the __DATA__
section of the file.
Unlike in perl itself, this means you can also do things like C<print>
new data onto the end of the __DATA__ section, or modify it with
any other process that can accept an L<IO::Handle> as input or output.
Returns an L<IO::String> object.
=cut
sub handle {
my $self = shift;
IO::String->new( \$self->{content} );
}
sub __TOKENIZER__on_line_start {
my ( $self, $t ) = @_;
# Add the line
if ( defined $t->{token} ) {
$t->{token}->{content} .= $t->{line};
}
else {
defined( $t->{token} = $t->{class}->new( $t->{line} ) ) or return undef;
}
return 0;
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,111 @@
package PPI::Token::End;
=pod
=head1 NAME
PPI::Token::End - Completely useless content after the __END__ tag
=head1 INHERITANCE
PPI::Token::End
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
If you've read L<PPI::Token::Whitespace>, you should understand by now
the concept of documents "floating in a sea of PPI::Token::Whitespace".
Well it doesn't after the __END__ tag.
Once you __END__, it's all over. Anything after that tag isn't even fit
to be called whitespace. It just simply doesn't exist as far as perl
(the interpreter) is concerned.
That's not to say there isn't useful content. Most often people use
the __END__ tag to hide POD content, so that perl never has to see it,
and presumably providing some small speed up.
That's fine. PPI likes POD. Any POD after the __END__ tag is parsed
into valid L<PPI::Token::Pod> tags as normal. B<This> class, on the
other hand, is for "what's after __END__ when it isn't POD".
Basically, the completely worthless bits of the file :)
=head1 METHODS
This class has no method beyond what is provided by its L<PPI::Token> and
L<PPI::Element> parent classes.
=cut
use strict;
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
#####################################################################
# Tokenizer Methods
### XS -> PPI/XS.xs:_PPI_Token_End__significant 0.900+
sub significant() { '' }
sub __TOKENIZER__on_char() { 1 }
sub __TOKENIZER__on_line_start {
my $t = $_[1];
# Can we classify the entire line in one go
if ( $t->{line} =~ /^=(\w+)/ ) {
# A Pod tag... change to pod mode
$t->_new_token( 'Pod', $t->{line} );
unless ( $1 eq 'cut' ) {
# Normal start to pod
$t->{class} = 'PPI::Token::Pod';
}
# This is an error, but one we'll ignore
# Don't go into Pod mode, since =cut normally
# signals the end of Pod mode
} else {
if ( defined $t->{token} ) {
# Add to existing token
$t->{token}->{content} .= $t->{line};
} else {
$t->_new_token( 'End', $t->{line} );
}
}
0;
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,323 @@
package PPI::Token::HereDoc;
=pod
=head1 NAME
PPI::Token::HereDoc - Token class for the here-doc
=head1 INHERITANCE
PPI::Token::HereDoc
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
Here-docs are incredibly handy when writing Perl, but incredibly tricky
when parsing it, primarily because they don't follow the general flow of
input.
They jump ahead and nab lines directly off the input buffer. Whitespace
and newlines may not matter in most Perl code, but they matter in here-docs.
They are also tricky to store as an object. They look sort of like an
operator and a string, but they don't act like it. And they have a second
section that should be something like a separate token, but isn't because a
string can span from above the here-doc content to below it.
So when parsing, this is what we do.
Firstly, the PPI::Token::HereDoc object, does not represent the C<<< << >>>
operator, or the "END_FLAG", or the content, or even the terminator.
It represents all of them at once.
The token itself has only the declaration part as its "content".
# This is what the content of a HereDoc token is
<<FOO
# Or this
<<"FOO"
# Or even this
<< 'FOO'
That is, the "operator", any whitespace separator, and the quoted or bare
terminator. So when you call the C<content> method on a HereDoc token, you
get '<< "FOO"'.
As for the content and the terminator, when treated purely in "content" terms
they do not exist.
The content is made available with the C<heredoc> method, and the name of
the terminator with the C<terminator> method.
To make things work in the way you expect, PPI has to play some games
when doing line/column location calculation for tokens, and also during
the content parsing and generation processes.
Documents cannot simply by recreated by stitching together the token
contents, and involve a somewhat more expensive procedure, but the extra
expense should be relatively negligible unless you are doing huge
quantities of them.
Please note that due to the immature nature of PPI in general, we expect
C<HereDocs> to be a rich (bad) source of corner-case bugs for quite a while,
but for the most part they should more or less DWYM.
=head2 Comparison to other string types
Although technically it can be considered a quote, for the time being
C<HereDocs> are being treated as a completely separate C<Token> subclass,
and will not be found in a search for L<PPI::Token::Quote> or
L<PPI::Token::QuoteLike> objects.
This may change in the future, with it most likely to end up under
QuoteLike.
=head1 METHODS
Although it has the standard set of C<Token> methods, C<HereDoc> objects
have a relatively large number of unique methods all of their own.
=cut
use strict;
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
#####################################################################
# PPI::Token::HereDoc Methods
=pod
=head2 heredoc
The C<heredoc> method is the authoritative method for accessing the contents
of the C<HereDoc> object.
It returns the contents of the here-doc as a list of newline-terminated
strings. If called in scalar context, it returns the number of lines in
the here-doc, B<excluding> the terminator line.
=cut
sub heredoc { @{shift->{_heredoc}} }
=pod
=head2 terminator
The C<terminator> method returns the name of the terminating string for the
here-doc.
Returns the terminating string as an unescaped string (in the rare case
the terminator has an escaped quote in it).
=cut
sub terminator {
shift->{_terminator};
}
sub _is_terminator {
my ( $self, $terminator, $line, $indented ) = @_;
if ( $indented ) {
return $line =~ /^\s*\Q$terminator\E$/;
} else {
return $line eq $terminator;
}
}
sub _indent {
my ( $self, $token ) = @_;
my ($indent) = $token->{_terminator_line} =~ /^(\s*)/;
return $indent;
}
sub _is_match_indent {
my ( $self, $token, $indent ) = @_;
return (grep { /^$indent/ } @{$token->{_heredoc}}) == @{$token->{_heredoc}};
}
#####################################################################
# Tokenizer Methods
# Parse in the entire here-doc in one call
sub __TOKENIZER__on_char {
my ( $self, $t ) = @_;
# We are currently located on the first char after the <<
# Handle the most common form first for simplicity and speed reasons
### FIXME - This regex, and this method in general, do not yet allow
### for the null here-doc, which terminates at the first
### empty line.
pos $t->{line} = $t->{line_cursor};
if ( $t->{line} !~ m/\G( ~? \s* (?: "[^"]*" | '[^']*' | `[^`]*` | \\?\w+ ) )/gcx ) {
# Degenerate to a left-shift operation
$t->{token}->set_class('Operator');
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
# Add the rest of the token, work out what type it is,
# and suck in the content until the end.
my $token = $t->{token};
$token->{content} .= $1;
$t->{line_cursor} += length $1;
# Find the terminator, clean it up and determine
# the type of here-doc we are dealing with.
my $content = $token->{content};
if ( $content =~ /^\<\<(~?)(\w+)$/ ) {
# Bareword
$token->{_mode} = 'interpolate';
$token->{_indented} = 1 if $1 eq '~';
$token->{_terminator} = $2;
} elsif ( $content =~ /^\<\<(~?)\s*\'(.*)\'$/ ) {
# ''-quoted literal
$token->{_mode} = 'literal';
$token->{_indented} = 1 if $1 eq '~';
$token->{_terminator} = $2;
$token->{_terminator} =~ s/\\'/'/g;
} elsif ( $content =~ /^\<\<(~?)\s*\"(.*)\"$/ ) {
# ""-quoted literal
$token->{_mode} = 'interpolate';
$token->{_indented} = 1 if $1 eq '~';
$token->{_terminator} = $2;
$token->{_terminator} =~ s/\\"/"/g;
} elsif ( $content =~ /^\<\<(~?)\s*\`(.*)\`$/ ) {
# ``-quoted command
$token->{_mode} = 'command';
$token->{_indented} = 1 if $1 eq '~';
$token->{_terminator} = $2;
$token->{_terminator} =~ s/\\`/`/g;
} elsif ( $content =~ /^\<\<(~?)\\(\w+)$/ ) {
# Legacy forward-slashed bareword
$token->{_mode} = 'literal';
$token->{_indented} = 1 if $1 eq '~';
$token->{_terminator} = $2;
} else {
# WTF?
return undef;
}
# Suck in the HEREDOC
$token->{_heredoc} = \my @heredoc;
my $terminator = $token->{_terminator} . "\n";
while ( defined( my $line = $t->_get_line ) ) {
if ( $self->_is_terminator( $terminator, $line, $token->{_indented} ) ) {
# Keep the actual termination line for consistency
# when we are re-assembling the file
$token->{_terminator_line} = $line;
if ( $token->{_indented} ) {
my $indent = $self->_indent( $token );
# Indentation of here-doc doesn't match delimiter
unless ( $self->_is_match_indent( $token, $indent ) ) {
push @heredoc, $line;
last;
}
s/^$indent// for @heredoc, $token->{_terminator_line};
}
# The HereDoc is now fully parsed
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
# Add the line
push @heredoc, $line;
}
# End of file.
# Error: Didn't reach end of here-doc before end of file.
# If the here-doc block is not empty, look at the last line to determine if
# the here-doc terminator is missing a newline (which Perl would fail to
# compile but is easy to detect) or if the here-doc block was just not
# terminated at all (which Perl would fail to compile as well).
$token->{_terminator_line} = undef;
if ( @heredoc and defined $heredoc[-1] ) {
# See PPI::Tokenizer, the algorithm there adds a space at the end of the
# document that we need to make sure we remove.
if ( $t->{source_eof_chop} ) {
chop $heredoc[-1];
$t->{source_eof_chop} = '';
}
# Check if the last line of the file matches the terminator without
# newline at the end. If so, remove it from the content and set it as
# the terminator line.
$token->{_terminator_line} = pop @heredoc
if $self->_is_terminator( $token->{_terminator}, $heredoc[-1], $token->{_indented} );
}
if ( $token->{_indented} && $token->{_terminator_line} ) {
my $indent = $self->_indent( $token );
if ( $self->_is_match_indent( $token, $indent ) ) {
# Remove indent from here-doc as much as possible
s/^$indent// for @heredoc;
}
s/^$indent// for $token->{_terminator_line};
}
# Set a hint for PPI::Document->serialize so it can
# inexpensively repair it if needed when writing back out.
$token->{_damaged} = 1;
# The HereDoc is not fully parsed
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
1;
=pod
=head1 TO DO
- Implement PPI::Token::Quote interface compatibility
- Check CPAN for any use of the null here-doc or here-doc-in-s///e
- Add support for the null here-doc
- Add support for here-doc in s///e
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,58 @@
package PPI::Token::Label;
=pod
=head1 NAME
PPI::Token::Label - Token class for a statement label
=head1 INHERITANCE
PPI::Token::Label
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
A label is an identifier attached to a line or statements, to allow for
various types of flow control. For example, a loop might have a label
attached so that a C<last> or C<next> flow control statement can be used
from multiple levels below to reference the loop directly.
=head1 METHODS
There are no additional methods beyond those provided by the parent
L<PPI::Token> and L<PPI::Element> classes.
=cut
use strict;
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,200 @@
package PPI::Token::Magic;
=pod
=head1 NAME
PPI::Token::Magic - Tokens representing magic variables
=head1 INHERITANCE
PPI::Token::Magic
isa PPI::Token::Symbol
isa PPI::Token
isa PPI::Element
=head1 SYNOPSIS
# When we say magic variables, we mean these...
$1 $2 $3 $4 $5 $6 $7 $8 $9
$_ $& $` $' $+ @+ %+ $* $. $/ $|
$\ $" $; $% $= $- @- %- $) $#
$~ $^ $: $? $! %! $@ $$ $< $>
$( $0 $[ $] @_ @* $} $, $#+ $#-
$^L $^A $^E $^C $^D $^F $^H
$^I $^M $^N $^O $^P $^R $^S
$^T $^V $^W $^X %^H
=head1 DESCRIPTION
C<PPI::Token::Magic> is a sub-class of L<PPI::Token::Symbol> which
identifies the token as "magic variable", one of the strange and
unusual variables that are connected to "things" behind the scenes.
Some are extremely common, like C<$_>, and others you will quite
probably never encounter in your Perl career.
=head1 METHODS
The class provides no additional methods, beyond those provided by
L<PPI::Token::Symbol>, L<PPI::Token> and L<PPI::Element>.
=cut
use strict;
use PPI::Token::Symbol ();
use PPI::Token::Unknown ();
use PPI::Singletons qw' %MAGIC $CURLY_SYMBOL ';
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token::Symbol";
sub __TOKENIZER__on_char {
my $t = $_[1];
# $c is the candidate new content
my $c = $t->{token}->{content} . substr( $t->{line}, $t->{line_cursor}, 1 );
# Do a quick first test so we don't have to do more than this one.
# All of the tests below match this one, so it should provide a
# small speed up. This regex should be updated to match the inside
# tests if they are changed.
if ( $c =~ /^ \$ .* [ \w : \$ \{ ] $/x ) {
if ( $c =~ /^(\$(?:\_[\w:]|::))/ or $c =~ /^\$\'[\w]/ ) {
# If and only if we have $'\d, it is not a
# symbol. (this was apparently a conscious choice)
# Note that $::0 on the other hand is legal
if ( $c =~ /^\$\'\d$/ ) {
# In this case, we have a magic plus a digit.
# Save the CURRENT token, and rerun the on_char
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
# A symbol in the style $_foo or $::foo or $'foo.
# Overwrite the current token
$t->{class} = $t->{token}->set_class('Symbol');
return PPI::Token::Symbol->__TOKENIZER__on_char( $t );
}
if ( $c =~ /^\$\$\w/ ) {
# This is really a scalar dereference. ( $$foo )
# Add the current token as the cast...
$t->{token} = PPI::Token::Cast->new( '$' );
$t->_finalize_token;
# ... and create a new token for the symbol
return $t->_new_token( 'Symbol', '$' );
}
if ( $c eq '$${' ) {
# This _might_ be a dereference of one of the
# control-character symbols.
pos $t->{line} = $t->{line_cursor} + 1;
if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) {
# This is really a dereference. ( $${^_foo} )
# Add the current token as the cast...
$t->{token} = PPI::Token::Cast->new( '$' );
$t->_finalize_token;
# ... and create a new token for the symbol
return $t->_new_token( 'Magic', '$' );
}
}
if ( $c eq '$#$' or $c eq '$#{' ) {
# This is really an index dereferencing cast, although
# it has the same two chars as the magic variable $#.
$t->{class} = $t->{token}->set_class('Cast');
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
if ( $c =~ /^(\$\#)\w/ ) {
# This is really an array index thingy ( $#array )
$t->{token} = PPI::Token::ArrayIndex->new( "$1" );
return PPI::Token::ArrayIndex->__TOKENIZER__on_char( $t );
}
if ( $c =~ /^\$\^\w+$/o ) {
# It's an escaped char magic... maybe ( like $^M )
my $next = substr( $t->{line}, $t->{line_cursor}+1, 1 ); # Peek ahead
if ($MAGIC{$c} && (!$next || $next !~ /\w/)) {
$t->{token}->{content} = $c;
$t->{line_cursor}++;
} else {
# Maybe it's a long magic variable like $^WIDE_SYSTEM_CALLS
return 1;
}
}
if ( $c =~ /^\$\#\{/ ) {
# The $# is actually a cast, and { is its block
# Add the current token as the cast...
$t->{token} = PPI::Token::Cast->new( '$#' );
$t->_finalize_token;
# ... and create a new token for the block
return $t->_new_token( 'Structure', '{' );
}
} elsif ($c =~ /^%\^/) {
return 1 if $c eq '%^';
# It's an escaped char magic... maybe ( like %^H )
if ($MAGIC{$c}) {
$t->{token}->{content} = $c;
$t->{line_cursor}++;
} else {
# Back off, treat '%' as an operator
chop $t->{token}->{content};
bless $t->{token}, $t->{class} = 'PPI::Token::Operator';
$t->{line_cursor}--;
}
}
if ( $MAGIC{$c} ) {
# $#+ and $#-
$t->{line_cursor} += length( $c ) - length( $t->{token}->{content} );
$t->{token}->{content} = $c;
} else {
pos $t->{line} = $t->{line_cursor};
if ( $t->{line} =~ m/($CURLY_SYMBOL)/gc ) {
# control character symbol (e.g. ${^MATCH})
$t->{token}->{content} .= $1;
$t->{line_cursor} += length $1;
} elsif ( $c =~ /^\$\d+$/ and $t->{line} =~ /\G(\d+)/gc ) {
# Grab trailing digits of regex capture variables.
$t->{token}{content} .= $1;
$t->{line_cursor} += length $1;
}
}
# End the current magic token, and recheck
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
# Our version of canonical is plain simple
sub canonical { $_[0]->content }
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,163 @@
package PPI::Token::Number;
=pod
=head1 NAME
PPI::Token::Number - Token class for a number
=head1 SYNOPSIS
$n = 1234; # decimal integer
$n = 0b1110011; # binary integer
$n = 01234; # octal integer
$n = 0x1234; # hexadecimal integer
$n = 12.34e-56; # exponential notation ( currently not working )
=head1 INHERITANCE
PPI::Token::Number
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The C<PPI::Token::Number> class is used for tokens that represent numbers,
in the various types that Perl supports.
=head1 METHODS
=cut
use strict;
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
=pod
=head2 base
The C<base> method is provided by all of the ::Number subclasses.
This is 10 for decimal, 16 for hexadecimal, 2 for binary, etc.
=cut
sub base() { 10 }
=pod
=head2 literal
Return the numeric value of this token.
=cut
sub literal {
return 0 + $_[0]->_literal;
}
sub _literal {
# De-sugar the string representation
my $self = shift;
my $string = $self->content;
$string =~ s/^\+//;
$string =~ s/_//g;
return $string;
}
#####################################################################
# Tokenizer Methods
sub __TOKENIZER__on_char {
my $class = shift;
my $t = shift;
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
# Allow underscores straight through
return 1 if $char eq '_';
# Handle the conversion from an unknown to known type.
# The regex covers "potential" hex/bin/octal number.
my $token = $t->{token};
if ( $token->{content} =~ /^-?0_*$/ ) {
# This could be special
if ( $char eq 'x' || $char eq 'X' ) {
$t->{class} = $t->{token}->set_class( 'Number::Hex' );
return 1;
} elsif ( $char eq 'b' || $char eq 'B' ) {
$t->{class} = $t->{token}->set_class( 'Number::Binary' );
return 1;
} elsif ( $char =~ /\d/ ) {
# You cannot have 8s and 9s on octals
if ( $char eq '8' or $char eq '9' ) {
$token->{_error} = "Illegal character in octal number '$char'";
}
$t->{class} = $t->{token}->set_class( 'Number::Octal' );
return 1;
}
}
# Handle the easy case, integer or real.
return 1 if $char =~ /\d/o;
if ( $char eq '.' ) {
$t->{class} = $t->{token}->set_class( 'Number::Float' );
return 1;
}
if ( $char eq 'e' || $char eq 'E' ) {
$t->{class} = $t->{token}->set_class( 'Number::Exp' );
return 1;
}
# Doesn't fit a special case, or is after the end of the token
# End of token.
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
1;
=pod
=head1 CAVEATS
Compared to Perl, the number tokenizer is too liberal about allowing
underscores anywhere. For example, the following is a syntax error in
Perl, but is allowed in PPI:
0_b10
=head1 TO DO
- Treat v-strings as binary strings or barewords, not as "base-256"
numbers
- Break out decimal integers into their own subclass?
- Implement literal()
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,117 @@
package PPI::Token::Number::Binary;
=pod
=head1 NAME
PPI::Token::Number::Binary - Token class for a binary number
=head1 SYNOPSIS
$n = 0b1110011; # binary integer
=head1 INHERITANCE
PPI::Token::Number::Binary
isa PPI::Token::Number
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The C<PPI::Token::Number::Binary> class is used for tokens that
represent base-2 numbers.
=head1 METHODS
=cut
use strict;
use PPI::Token::Number ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token::Number";
=pod
=head2 base
Returns the base for the number: 2.
=cut
sub base() { 2 }
=pod
=head2 literal
Return the numeric value of this token.
=cut
sub literal {
my $self = shift;
return if $self->{_error};
my $str = $self->_literal;
my $neg = $str =~ s/^\-//;
$str =~ s/^0[bB]//;
my $val = 0;
for my $bit ( $str =~ m/(.)/g ) {
$val = $val * 2 + $bit;
}
return $neg ? -$val : $val;
}
#####################################################################
# Tokenizer Methods
sub __TOKENIZER__on_char {
my $class = shift;
my $t = shift;
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
# Allow underscores straight through
return 1 if $char eq '_';
if ( $char =~ /[\w\d]/ ) {
unless ( $char eq '1' or $char eq '0' ) {
# Add a warning if it contains non-binary chars
$t->{token}->{_error} = "Illegal character in binary number '$char'";
}
return 1;
}
# Doesn't fit a special case, or is after the end of the token
# End of token.
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Chris Dolan E<lt>cdolan@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2006 Chris Dolan.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,146 @@
package PPI::Token::Number::Exp;
=pod
=head1 NAME
PPI::Token::Number::Exp - Token class for an exponential notation number
=head1 SYNOPSIS
$n = 1.0e-2;
$n = 1e+2;
=head1 INHERITANCE
PPI::Token::Number::Exp
isa PPI::Token::Number::Float
isa PPI::Token::Number
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The C<PPI::Token::Number::Exp> class is used for tokens that
represent floating point numbers with exponential notation.
=head1 METHODS
=cut
use strict;
use PPI::Token::Number::Float ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token::Number::Float";
=pod
=head2 literal
Return the numeric value of this token.
=cut
sub literal {
my $self = shift;
return if $self->{_error};
my ($mantissa, $exponent) = split m/e/i, $self->_literal;
my $neg = $mantissa =~ s/^\-//;
$mantissa =~ s/^\./0./;
$exponent =~ s/^\+//;
# Must cast exponent as numeric type, due to string type '00' exponent
# creating false positive condition in for() loop below, causing infinite loop
$exponent += 0;
# This algorithm is reasonably close to the S_mulexp10()
# algorithm from the Perl source code, so it should arrive
# at the same answer as Perl most of the time.
my $negpow = 0;
if ($exponent < 0) {
$negpow = 1;
$exponent *= -1;
}
my $result = 1;
my $power = 10;
for (my $bit = 1; $exponent; $bit = $bit << 1) {
if ($exponent & $bit) {
$exponent = $exponent ^ $bit;
$result *= $power;
}
$power *= $power;
}
my $val = $neg ? 0 - $mantissa : $mantissa;
return $negpow ? $val / $result : $val * $result;
}
#####################################################################
# Tokenizer Methods
sub __TOKENIZER__on_char {
my $class = shift;
my $t = shift;
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
# To get here, the token must have already encountered an 'E'
# Allow underscores straight through
return 1 if $char eq '_';
# Allow digits
return 1 if $char =~ /\d/o;
# Start of exponent is special
if ( $t->{token}->{content} =~ /e$/i ) {
# Allow leading +/- in exponent
return 1 if $char eq '-' || $char eq '+';
# Invalid character in exponent. Recover
if ( $t->{token}->{content} =~ s/\.(e)$//i ) {
my $word = $1;
$t->{class} = $t->{token}->set_class('Number');
$t->_new_token('Operator', '.');
$t->_new_token('Word', $word);
return $t->{class}->__TOKENIZER__on_char( $t );
}
else {
$t->{token}->{_error} = "Illegal character in exponent '$char'";
}
}
# Doesn't fit a special case, or is after the end of the token
# End of token.
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Chris Dolan E<lt>cdolan@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2006 Chris Dolan.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,140 @@
package PPI::Token::Number::Float;
=pod
=head1 NAME
PPI::Token::Number::Float - Token class for a floating-point number
=head1 SYNOPSIS
$n = 1.234;
=head1 INHERITANCE
PPI::Token::Number::Float
isa PPI::Token::Number
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The C<PPI::Token::Number::Float> class is used for tokens that
represent floating point numbers. A float is identified by n decimal
point. Exponential notation (the C<e> or C<E>) is handled by the
PPI::Token::Number::Exp class.
=head1 METHODS
=cut
use strict;
use PPI::Token::Number ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token::Number";
=pod
=head2 base
Returns the base for the number: 10.
=cut
sub base() { 10 }
=pod
=head2 literal
Return the numeric value of this token.
=cut
sub literal {
my $self = shift;
my $str = $self->_literal;
my $neg = $str =~ s/^\-//;
$str =~ s/^\./0./;
my $val = 0+$str;
return $neg ? -$val : $val;
}
#####################################################################
# Tokenizer Methods
sub __TOKENIZER__on_char {
my $class = shift;
my $t = shift;
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
# Allow underscores straight through
return 1 if $char eq '_';
# Allow digits
return 1 if $char =~ /\d/o;
if ( $char eq '.' ) { # A second decimal point? That gets complicated.
if ( $t->{token}{content} =~ /\.$/ ) {
# We have a .., which is an operator. Take the . off the end of the
# token and finish it, then make the .. operator.
chop $t->{token}{content};
$t->{class} = $t->{token}->set_class( 'Number' );
$t->_new_token('Operator', '..');
return 0;
} elsif ( $t->{token}{content} =~ /\._/ ) {
($t->{token}{content}, my $bareword)
= split /\./, $t->{token}{content};
$t->{class} = $t->{token}->set_class( 'Number' );
$t->_new_token('Operator', '.');
$t->_new_token('Word', $bareword);
$t->_new_token('Operator', '.');
return 0;
} else {
$t->{class} = $t->{token}->set_class( 'Number::Version' );
return 1;
}
}
# perl seems to regard pretty much anything that's not strictly an exp num
# as float + stuff
my $char2 = substr $t->{line}, $t->{line_cursor}+1, 1;
if ("$char$char2" =~ /[eE][0-9+-]/) {
$t->{class} = $t->{token}->set_class( 'Number::Exp' );
return 1;
}
# Doesn't fit a special case, or is after the end of the token
# End of token.
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Chris Dolan E<lt>cdolan@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2006 Chris Dolan.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,108 @@
package PPI::Token::Number::Hex;
=pod
=head1 NAME
PPI::Token::Number::Hex - Token class for a binary number
=head1 SYNOPSIS
$n = 0x1234; # hexadecimal integer
=head1 INHERITANCE
PPI::Token::Number::Hex
isa PPI::Token::Number
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The C<PPI::Token::Number::Hex> class is used for tokens that
represent base-16 numbers.
=head1 METHODS
=cut
use strict;
use PPI::Token::Number ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token::Number";
=pod
=head2 base
Returns the base for the number: 16.
=cut
sub base() { 16 }
=pod
=head2 literal
Return the numeric value of this token.
=cut
sub literal {
my $self = shift;
my $str = $self->_literal;
my $neg = $str =~ s/^\-//;
my $val = hex lc( $str ); # lc for compatibility with perls before 5.14
return $neg ? -$val : $val;
}
#####################################################################
# Tokenizer Methods
sub __TOKENIZER__on_char {
my $class = shift;
my $t = shift;
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
# Allow underscores straight through
return 1 if $char eq '_';
if ( $char =~ /[[:xdigit:]]/ ) {
return 1;
}
# Doesn't fit a special case, or is after the end of the token
# End of token.
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Chris Dolan E<lt>cdolan@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2006 Chris Dolan.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,113 @@
package PPI::Token::Number::Octal;
=pod
=head1 NAME
PPI::Token::Number::Octal - Token class for a binary number
=head1 SYNOPSIS
$n = 0777; # octal integer
=head1 INHERITANCE
PPI::Token::Number::Octal
isa PPI::Token::Number
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The C<PPI::Token::Number::Octal> class is used for tokens that
represent base-8 numbers.
=head1 METHODS
=cut
use strict;
use PPI::Token::Number ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token::Number";
=pod
=head2 base
Returns the base for the number: 8.
=cut
sub base() { 8 }
=pod
=head2 literal
Return the numeric value of this token.
=cut
sub literal {
my $self = shift;
return if $self->{_error};
my $str = $self->_literal;
my $neg = $str =~ s/^\-//;
my $val = oct $str;
return $neg ? -$val : $val;
}
#####################################################################
# Tokenizer Methods
sub __TOKENIZER__on_char {
my $class = shift;
my $t = shift;
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
# Allow underscores straight through
return 1 if $char eq '_';
if ( $char =~ /\d/ ) {
# You cannot have 8s and 9s on octals
if ( $char eq '8' or $char eq '9' ) {
$t->{token}->{_error} = "Illegal character in octal number '$char'";
}
return 1;
}
# Doesn't fit a special case, or is after the end of the token
# End of token.
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Chris Dolan E<lt>cdolan@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2006 Chris Dolan.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,164 @@
package PPI::Token::Number::Version;
=pod
=head1 NAME
PPI::Token::Number::Version - Token class for a byte-packed number
=head1 SYNOPSIS
$n = 1.1.0;
$n = 127.0.0.1;
$n = 10_000.10_000.10_000;
$n = v1.2.3.4
=head1 INHERITANCE
PPI::Token::Number::Version
isa PPI::Token::Number
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The C<PPI::Token::Number::Version> class is used for tokens that have
multiple decimal points. In truth, these aren't treated like numbers
at all by Perl, but they look like numbers to a parser.
=head1 METHODS
=cut
use strict;
use PPI::Token::Number ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token::Number";
=pod
=head2 base
Returns the base for the number: 256.
=cut
sub base() { 256 }
=pod
=head2 literal
Return the numeric value of this token.
=cut
sub literal {
my $self = shift;
my $content = $self->{content};
$content =~ s/^v//;
return join '', map { chr $_ } ( split /\./, $content );
}
#####################################################################
# Tokenizer Methods
sub __TOKENIZER__on_char {
my $class = shift;
my $t = shift;
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
# Allow digits
return 1 if $char =~ /\d/o;
if( $char eq '_' ) {
return 1 if $t->{token}{content} !~ /\.$/;
chop $t->{token}->{content};
$t->{class} = $t->{token}->set_class( 'Number::Float' )
if $t->{token}{content} !~ /\..+\./;
$t->_new_token('Operator', '.');
$t->_new_token('Word', '_');
return 0;
}
# Is this a second decimal point in a row? Then the '..' operator
if ( $char eq '.' ) {
if ( $t->{token}->{content} =~ /\.$/ ) {
# We have a .., which is an operator.
# Take the . off the end of the token..
# and finish it, then make the .. operator.
chop $t->{token}->{content};
$t->{class} = $t->{token}->set_class( 'Number::Float' )
if $t->{token}{content} !~ /\..+\./;
$t->_new_token('Operator', '..');
return 0;
} else {
return 1;
}
}
# Doesn't fit a special case, or is after the end of the token
# End of token.
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
sub __TOKENIZER__commit {
my $t = $_[1];
# Capture the rest of the token
pos $t->{line} = $t->{line_cursor};
# This was not a v-string after all (it's a word);
return PPI::Token::Word->__TOKENIZER__commit($t)
if $t->{line} !~ m/\G(v\d[_\d]*(?:\.\d[_\d]*)+|v\d[_\d]*\b)/gc;
my $content = $1;
# If there are no periods this could be a word starting with v\d
# Forced to be a word. Done.
return PPI::Token::Word->__TOKENIZER__commit($t)
if $content !~ /\./ and $t->__current_token_is_forced_word($content);
# This is a v-string
$t->{line_cursor} += length $content;
$t->_new_token( 'Number::Version', $content );
$t->_finalize_token->__TOKENIZER__on_char($t);
}
1;
=pod
=head1 BUGS
- Does not handle leading minus sign correctly. Should translate to a DashedWord.
See L<http://perlmonks.org/?node_id=574573>
-95.0.1.0 --> "-_\000\cA\000"
-96.0.1.0 --> Argument "`\0^A\0" isn't numeric in negation (-)
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Chris Dolan E<lt>cdolan@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2006 Chris Dolan.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,124 @@
package PPI::Token::Operator;
=pod
=head1 NAME
PPI::Token::Operator - Token class for operators
=head1 INHERITANCE
PPI::Token::Operator
isa PPI::Token
isa PPI::Element
=head1 SYNOPSIS
# This is the list of valid operators
++ -- ** ! ~ + -
=~ !~ * / % x
<< >> lt gt le ge cmp ~~
== != <=> . .. ... ,
& | ^ && || //
? : **= += -= .= *= /=
%= x= &= |= ^= <<= >>= &&=
||= //= < > <= >= <> => ->
and or xor not eq ne <<>>
=head1 DESCRIPTION
All operators in PPI are created as C<PPI::Token::Operator> objects,
including the ones that may superficially look like a L<PPI::Token::Word>
object.
=head1 METHODS
There are no additional methods beyond those provided by the parent
L<PPI::Token> and L<PPI::Element> classes.
=cut
use strict;
use PPI::Token ();
use PPI::Singletons '%OPERATOR';
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
#####################################################################
# Tokenizer Methods
sub __TOKENIZER__on_char {
my $t = $_[1];
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
# Are we still an operator if we add the next character
my $content = $t->{token}->{content};
# special case for <<>> operator
if(length($content) < 4 &&
$content . substr( $t->{line}, $t->{line_cursor}, 4 - length($content) ) eq '<<>>') {
return 1;
}
return 1 if $OPERATOR{ $content . $char };
# Handle the special case of a .1234 decimal number
if ( $content eq '.' ) {
if ( $char =~ /^[0-9]$/ ) {
# This is a decimal number
$t->{class} = $t->{token}->set_class('Number::Float');
return $t->{class}->__TOKENIZER__on_char( $t );
}
}
# Handle the special case if we might be a here-doc
if ( $content eq '<<' ) {
pos $t->{line} = $t->{line_cursor};
# Either <<FOO or << 'FOO' or <<\FOO or
# <<~FOO or <<~ 'FOO' or <<~\FOO
### Is the zero-width look-ahead assertion really
### supposed to be there?
if ( $t->{line} =~ m/\G ~? (?: (?!\d)\w | \s*['"`] | \\\w ) /gcx ) {
# This is a here-doc.
# Change the class and move to the HereDoc's own __TOKENIZER__on_char method.
$t->{class} = $t->{token}->set_class('HereDoc');
return $t->{class}->__TOKENIZER__on_char( $t );
}
}
# Handle the special case of the null Readline
$t->{class} = $t->{token}->set_class('QuoteLike::Readline')
if $content eq '<>' or $content eq '<<>>';
# Finalize normally
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,159 @@
package PPI::Token::Pod;
=pod
=head1 NAME
PPI::Token::Pod - Sections of POD in Perl documents
=head1 INHERITANCE
PPI::Token::Pod
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
A single C<PPI::Token::Pod> object represents a complete section of POD
documentation within a Perl document.
=head1 METHODS
This class provides some additional methods beyond those provided by its
L<PPI::Token> and L<PPI::Element> parent classes.
=cut
use strict;
use Params::Util qw{_INSTANCE};
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
#####################################################################
# PPI::Token::Pod Methods
=pod
=head2 merge @podtokens
The C<merge> constructor takes a number of C<PPI::Token::Pod> objects,
and returns a new object that represents one combined POD block with
the content of all of them.
Returns a new C<PPI::Token::Pod> object, or C<undef> on error.
=cut
sub merge {
my $class = (! ref $_[0]) ? shift : return undef;
# Check there are no bad arguments
if ( grep { ! _INSTANCE($_, 'PPI::Token::Pod') } @_ ) {
return undef;
}
# Get the tokens, and extract the lines
my @content = ( map { [ $_->lines ] } @_ ) or return undef;
# Remove the leading =pod tags, trailing =cut tags, and any empty lines
# between them and the pod contents.
foreach my $pod ( @content ) {
# Leading =pod tag
if ( @$pod and $pod->[0] =~ /^=pod\b/o ) {
shift @$pod;
}
# Trailing =cut tag
if ( @$pod and $pod->[-1] =~ /^=cut\b/o ) {
pop @$pod;
}
# Leading and trailing empty lines
while ( @$pod and $pod->[0] eq '' ) { shift @$pod }
while ( @$pod and $pod->[-1] eq '' ) { pop @$pod }
}
# Remove any empty pod sections, and add the =pod and =cut tags
# for the merged pod back to it.
@content = ( [ '=pod' ], grep { @$_ } @content, [ '=cut' ] );
# Create the new object
$class->new( join "\n", map { join( "\n", @$_ ) . "\n" } @content );
}
=pod
=head2 lines
The C<lines> method takes the string of POD and breaks it into lines,
returning them as a list.
=cut
sub lines {
split /(?:\015{1,2}\012|\015|\012)/, $_[0]->{content};
}
#####################################################################
# PPI::Element Methods
### XS -> PPI/XS.xs:_PPI_Token_Pod__significant 0.900+
sub significant() { '' }
#####################################################################
# Tokenizer Methods
sub __TOKENIZER__on_line_start {
my $t = $_[1];
# Add the line to the token first
$t->{token}->{content} .= $t->{line};
# Check the line to see if it is a =cut line
if ( $t->{line} =~ /^=(\w+)/ ) {
# End of the token
$t->_finalize_token if $1 eq 'cut';
}
0;
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,115 @@
package PPI::Token::Prototype;
=pod
=head1 NAME
PPI::Token::Prototype - A subroutine prototype descriptor
=head1 INHERITANCE
PPI::Token::End
isa PPI::Token
isa PPI::Element
=head1 SYNOPSIS
sub ($@) prototype;
=head1 DESCRIPTION
Although it sort of looks like a list or condition, a subroutine
prototype is a lot more like a string. Its job is to provide hints
to the perl compiler on what type of arguments a particular subroutine
expects, which the compiler uses to validate parameters at compile-time,
and allows programmers to use the functions without explicit parameter
parens.
Due to the rise of OO Perl coding, which ignores these prototypes, they
are most often used to allow for constant-like things, and to "extend"
the language and create things that act like keywords and core functions.
# Create something that acts like a constant
sub MYCONSTANT () { 10 }
# Create the "any" core-looking function
sub any (&@) { ... }
if ( any { $_->cute } @babies ) {
...
}
=head1 METHODS
This class provides one additional method beyond those defined by the
L<PPI::Token> and L<PPI::Element> parent classes.
=cut
use strict;
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
sub __TOKENIZER__on_char {
my $class = shift;
my $t = shift;
# Suck in until we find the closing paren (or the end of line)
pos $t->{line} = $t->{line_cursor};
die "regex should always match" if $t->{line} !~ m/\G(.*?(?:\)|$))/gc;
$t->{token}->{content} .= $1;
$t->{line_cursor} += length $1;
# Shortcut if end of line
return 0 unless $1 =~ /\)$/;
# Found the closing paren
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
=pod
=head2 prototype
The C<prototype> accessor returns the actual prototype pattern, stripped
of flanking parens and of all whitespace. This mirrors the behavior of
the Perl C<prototype> builtin function.
Note that stripping parens and whitespace means that the return of
C<prototype> can be an empty string.
=cut
sub prototype {
my $self = shift;
my $proto = $self->content;
$proto =~ s/(^\(|\)$|\s+)//g;
$proto;
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,120 @@
package PPI::Token::Quote;
=pod
=head1 NAME
PPI::Token::Quote - String quote abstract base class
=head1 INHERITANCE
PPI::Token::Quote
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The C<PPI::Token::Quote> class is never instantiated, and simply
provides a common abstract base class for the four quote classes.
In PPI, a "quote" is limited to only the quote-like things that
themselves directly represent a string. (although this includes
double quotes with interpolated elements inside them, note that
L<String::InterpolatedVariables> allows to extract them).
The subclasses of C<PPI::Token::Quote> are:
=over 2
=item C<''> - L<PPI::Token::Quote::Single>
=item C<q{}> - L<PPI::Token::Quote::Literal>
=item C<""> - L<PPI::Token::Quote::Double>
=item C<qq{}> - L<PPI::Token::Quote::Interpolate>
=back
The names are hopefully obvious enough not to have to explain what
each class is here. See their respective pages for more details.
Please note that although the here-doc B<does> represent a literal
string, it is such a nasty piece of work that in L<PPI> it is given the
honor of its own token class (L<PPI::Token::HereDoc>).
=head1 METHODS
=cut
use strict;
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
#####################################################################
# PPI::Token::Quote Methods
=pod
=head2 string
The C<string> method is provided by all four ::Quote classes. It won't
get you the actual literal Perl value, but it will strip off the wrapping
of the quotes.
# The following all return foo from the ->string method
'foo'
"foo"
q{foo}
qq <foo>
=cut
#sub string {
# my $class = ref $_[0] || $_[0];
# die "$class does not implement method ->string";
#}
=pod
=head2 literal
The C<literal> method is provided by ::Quote::Literal and
::Quote::Single. This returns the value of the string as Perl sees
it: without the quote marks and with C<\\> and C<\'> resolved to C<\>
and C<'>.
The C<literal> method is not implemented by ::Quote::Double or
::Quote::Interpolate yet.
=cut
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,138 @@
package PPI::Token::Quote::Double;
=pod
=head1 NAME
PPI::Token::Quote::Double - A standard "double quote" token
=head1 INHERITANCE
PPI::Token::Quote::Double
isa PPI::Token::Quote
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
A C<PPI::Token::Quote::Double> object represents a double-quoted
interpolating string.
The string is treated as a single entity, L<PPI> will not try to
understand what is in the string during the parsing process.
=head1 METHODS
There are several methods available for C<PPI::Token::Quote::Double>, beyond
those provided by the parent L<PPI::Token::Quote>, L<PPI::Token> and
L<PPI::Element> classes.
=cut
use strict;
use Params::Util qw{_INSTANCE};
use PPI::Token::Quote ();
use PPI::Token::_QuoteEngine::Simple ();
our $VERSION = '1.270'; # VERSION
our @ISA = qw{
PPI::Token::_QuoteEngine::Simple
PPI::Token::Quote
};
#####################################################################
# PPI::Token::Quote::Double Methods
=pod
=head2 interpolations
The interpolations method checks to see if the double quote actually
contains any interpolated variables.
Returns true if the string contains interpolations, or false if not.
=cut
# Upgrade: Return the interpolated substrings.
# Upgrade: Returns parsed expressions.
sub interpolations {
# Are there any unescaped $things in the string
!! ($_[0]->content =~ /(?<!\\)(?:\\\\)*[\$\@]/);
}
=pod
=head2 simplify
For various reasons, some people find themselves compelled to have
their code in the simplest form possible.
The C<simplify> method will, if possible, modify a simple double-quoted
string token in place, turning it into the equivalent single-quoted
string. If the token is modified, it is reblessed into the
L<PPI::Token::Quote::Single> package.
Because the length of the content is not changed, there is no need
to call the document's C<flush_locations> method.
The object itself is returned as a convenience.
=cut
sub simplify {
# This only works on EXACTLY this class
my $self = _INSTANCE(shift, 'PPI::Token::Quote::Double') or return undef;
# Don't bother if there are characters that could complicate things
my $content = $self->content;
my $value = substr($content, 1, length($content) - 2);
return $self if $value =~ /[\\\$@\'\"]/;
# Change the token to a single string
$self->{content} = "'$value'";
bless $self, 'PPI::Token::Quote::Single';
}
#####################################################################
# PPI::Token::Quote Methods
sub string {
my $str = $_[0]->{content};
substr( $str, 1, length($str) - 2 );
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,76 @@
package PPI::Token::Quote::Interpolate;
=pod
=head1 NAME
PPI::Token::Quote::Interpolate - The interpolation quote-like operator
=head1 INHERITANCE
PPI::Token::Quote::Interpolate
isa PPI::Token::Quote
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
A C<PPI::Token::Quote::Interpolate> object represents a single
interpolation quote-like operator, such as C<qq{$foo bar $baz}>.
=head1 METHODS
There are no methods available for C<PPI::Token::Quote::Interpolate>
beyond those provided by the parent L<PPI::Token::Quote>, L<PPI::Token> and
L<PPI::Element> classes.
=cut
use strict;
use PPI::Token::Quote ();
use PPI::Token::_QuoteEngine::Full ();
our $VERSION = '1.270'; # VERSION
our @ISA = qw{
PPI::Token::_QuoteEngine::Full
PPI::Token::Quote
};
#####################################################################
# PPI::Token::Quote Methods
sub string {
my $self = shift;
my @sections = $self->_sections;
my $str = $sections[0];
substr( $self->{content}, $str->{position}, $str->{size} );
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,80 @@
package PPI::Token::Quote::Literal;
=pod
=head1 NAME
PPI::Token::Quote::Literal - The literal quote-like operator
=head1 INHERITANCE
PPI::Token::Quote::Literal
isa PPI::Token::Quote
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
A C<PPI::Token::Quote::Literal> object represents a single literal
quote-like operator, such as C<q{foo bar}>.
=head1 METHODS
There are no methods available for C<PPI::Token::Quote::Literal> beyond
those provided by the parent L<PPI::Token::Quote>, L<PPI::Token> and
L<PPI::Element> classes.
=cut
use strict;
use PPI::Token::Quote ();
use PPI::Token::_QuoteEngine::Full ();
our $VERSION = '1.270'; # VERSION
our @ISA = qw{
PPI::Token::_QuoteEngine::Full
PPI::Token::Quote
};
#####################################################################
# PPI::Token::Quote Methods
sub string {
my $self = shift;
my @sections = $self->_sections;
my $str = $sections[0];
substr( $self->{content}, $str->{position}, $str->{size} );
}
# Use the same implementation as another module
*literal = *PPI::Token::Quote::Single::literal;
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,93 @@
package PPI::Token::Quote::Single;
=pod
=head1 NAME
PPI::Token::Quote::Single - A 'single quote' token
=head1 INHERITANCE
PPI::Token::Quote::Single
isa PPI::Token::Quote
isa PPI::Token
isa PPI::Element
=head1 SYNOPSIS
'This is a single quote'
q{This is a literal, but NOT a single quote}
=head1 DESCRIPTION
A C<PPI::Token::Quote::Single> object represents a single quoted string
literal.
=head1 METHODS
There are no methods available for C<PPI::Token::Quote::Single> beyond
those provided by the parent L<PPI::Token::Quote>, L<PPI::Token> and
L<PPI::Element> classes.
=cut
use strict;
use PPI::Token::Quote ();
use PPI::Token::_QuoteEngine::Simple ();
our $VERSION = '1.270'; # VERSION
our @ISA = qw{
PPI::Token::_QuoteEngine::Simple
PPI::Token::Quote
};
#####################################################################
# PPI::Token::Quote Methods
sub string {
my $str = $_[0]->{content};
substr( $str, 1, length($str) - 2 );
}
my %UNESCAPE = (
"\\'" => "'",
"\\\\" => "\\",
);
sub literal {
# Unescape \\ and \' ONLY
my $str = $_[0]->string;
$str =~ s/(\\.)/$UNESCAPE{$1} || $1/ge;
return $str;
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,77 @@
package PPI::Token::QuoteLike;
=pod
=head1 NAME
PPI::Token::QuoteLike - Quote-like operator abstract base class
=head1 INHERITANCE
PPI::Token::QuoteLike
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The C<PPI::Token::QuoteLike> class is never instantiated, and simply
provides a common abstract base class for the five quote-like operator
classes. In PPI, a "quote-like" is the set of quote-like things that
exclude the string quotes and regular expressions.
The subclasses of C<PPI::Token::QuoteLike> are:
=over 2
=item qw{} - L<PPI::Token::QuoteLike::Words>
=item `` - L<PPI::Token::QuoteLike::Backtick>
=item qx{} - L<PPI::Token::QuoteLike::Command>
=item qr// - L<PPI::Token::QuoteLike::Regexp>
=item <FOO> - L<PPI::Token::QuoteLike::Readline>
=back
The names are hopefully obvious enough not to have to explain what
each class is. See their pages for more details.
You may note that the backtick and command quote-like are treated
separately, even though they do the same thing. This is intentional,
as the inherit from and are processed by two different parts of the
PPI's quote engine.
=cut
use strict;
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,62 @@
package PPI::Token::QuoteLike::Backtick;
=pod
=head1 NAME
PPI::Token::QuoteLike::Backtick - A `backticks` command token
=head1 INHERITANCE
PPI::Token::QuoteLike::Backtick
isa PPI::Token::QuoteLike
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
A C<PPI::Token::QuoteLike::Backtick> object represents a command output
capturing quote.
=head1 METHODS
There are no methods available for C<PPI::Token::QuoteLike::Backtick>
beyond those provided by the parent L<PPI::Token::QuoteLike>, L<PPI::Token>
and L<PPI::Element> classes.
=cut
use strict;
use PPI::Token::QuoteLike ();
use PPI::Token::_QuoteEngine::Simple ();
our $VERSION = '1.270'; # VERSION
our @ISA = qw{
PPI::Token::_QuoteEngine::Simple
PPI::Token::QuoteLike
};
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,62 @@
package PPI::Token::QuoteLike::Command;
=pod
=head1 NAME
PPI::Token::QuoteLike::Command - The command quote-like operator
=head1 INHERITANCE
PPI::Token::QuoteLike::Command
isa PPI::Token::QuoteLike
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
A C<PPI::Token::QuoteLike::Command> object represents a command output
capturing quote-like operator.
=head1 METHODS
There are no methods available for C<PPI::Token::QuoteLike::Command>
beyond those provided by the parent L<PPI::Token::QuoteLike>, L<PPI::Token>
and L<PPI::Element> classes.
=cut
use strict;
use PPI::Token::QuoteLike ();
use PPI::Token::_QuoteEngine::Full ();
our $VERSION = '1.270'; # VERSION
our @ISA = qw{
PPI::Token::_QuoteEngine::Full
PPI::Token::QuoteLike
};
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,71 @@
package PPI::Token::QuoteLike::Readline;
=pod
=head1 NAME
PPI::Token::QuoteLike::Readline - The readline quote-like operator
=head1 INHERITANCE
PPI::Token::QuoteLike::Readline
isa PPI::Token::QuoteLike
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The C<readline> quote-like operator is used to read either a single
line from a file, or all the lines from a file, as follows.
# Read in a single line
$line = <FILE>;
# From a scalar handle
$line = <$filehandle>;
# Read all the lines
@lines = <FILE>;
=head1 METHODS
There are no methods available for C<PPI::Token::QuoteLike::Readline>
beyond those provided by the parent L<PPI::Token::QuoteLike>, L<PPI::Token>
and L<PPI::Element> classes.
=cut
use strict;
use PPI::Token::QuoteLike ();
use PPI::Token::_QuoteEngine::Full ();
our $VERSION = '1.270'; # VERSION
our @ISA = qw{
PPI::Token::_QuoteEngine::Full
PPI::Token::QuoteLike
};
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,126 @@
package PPI::Token::QuoteLike::Regexp;
=pod
=head1 NAME
PPI::Token::QuoteLike::Regexp - Regexp constructor quote-like operator
=head1 INHERITANCE
PPI::Token::QuoteLike::Regexp
isa PPI::Token::QuoteLike
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
A C<PPI::Token::QuoteLike::Regexp> object represents the quote-like
operator used to construct anonymous L<Regexp> objects, as follows.
# Create a Regexp object for a module filename
my $module = qr/\.pm$/;
=head1 METHODS
The following methods are provided by this class,
beyond those provided by the parent L<PPI::Token::QuoteLike>,
L<PPI::Token> and L<PPI::Element> classes.
=cut
use strict;
use PPI::Token::QuoteLike ();
use PPI::Token::_QuoteEngine::Full ();
our $VERSION = '1.270'; # VERSION
our @ISA = qw{
PPI::Token::_QuoteEngine::Full
PPI::Token::QuoteLike
};
#####################################################################
# PPI::Token::QuoteLike::Regexp Methods
=pod
=head2 get_match_string
The C<get_match_string> method returns the portion of the string that
will be compiled into the match portion of the regexp.
=cut
sub get_match_string {
return $_[0]->_section_content( 0 );
}
=pod
=head2 get_substitute_string
The C<get_substitute_string> method always returns C<undef>, since
the C<qr{}> construction provides no substitution string. This method
is provided for orthogonality with C<PPI::Token::Regexp>.
=cut
sub get_substitute_string {
return undef;
}
=pod
=head2 get_modifiers
The C<get_modifiers> method returns the modifiers that will be
compiled into the regexp.
=cut
sub get_modifiers {
return $_[0]->_modifiers();
}
=pod
=head2 get_delimiters
The C<get_delimiters> method returns the delimiters of the string as an
array. The first and only element is the delimiters of the string to be
compiled into a match string.
=cut
sub get_delimiters {
return $_[0]->_delimiters();
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,88 @@
package PPI::Token::QuoteLike::Words;
=pod
=head1 NAME
PPI::Token::QuoteLike::Words - Word list constructor quote-like operator
=head1 INHERITANCE
PPI::Token::QuoteLike::Words
isa PPI::Token::QuoteLike
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
A C<PPI::Token::QuoteLike::Words> object represents a quote-like operator
that acts as a constructor for a list of words.
# Create a list for a significant chunk of the alphabet
my @list = qw{a b c d e f g h i j k l};
=head1 METHODS
=cut
use strict;
use PPI::Token::QuoteLike ();
use PPI::Token::_QuoteEngine::Full ();
our $VERSION = '1.270'; # VERSION
our @ISA = qw{
PPI::Token::_QuoteEngine::Full
PPI::Token::QuoteLike
};
=pod
=head2 literal
Returns the words contained as a list. Note that this method does not check the
context that the token is in; it always returns the list and not merely
the last element if the token is in scalar context.
=cut
sub literal {
my ( $self ) = @_;
my $content = $self->_section_content(0);
return if !defined $content;
# Undo backslash escaping of '\', the left delimiter,
# and the right delimiter. The right delimiter will
# only exist with paired delimiters: qw() qw[] qw<> qw{}.
my ( $left, $right ) = ( $self->_delimiters, '', '' );
$content =~ s/\\([\Q$left$right\\\E])/$1/g;
my @words = split ' ', $content;
return @words;
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,136 @@
package PPI::Token::Regexp;
=pod
=head1 NAME
PPI::Token::Regexp - Regular expression abstract base class
=head1 INHERITANCE
PPI::Token::Regexp
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The C<PPI::Token::Regexp> class is never instantiated, and simply
provides a common abstract base class for the three regular expression
classes. These being:
=over 2
=item m// - L<PPI::Token::Regexp::Match>
=item s/// - L<PPI::Token::Regexp::Substitute>
=item tr/// - L<PPI::Token::Regexp::Transliterate>
=back
The names are hopefully obvious enough not to have to explain what
each class is. See their pages for more details.
To save some confusion, it's worth pointing out here that C<qr//> is
B<not> a regular expression (which PPI takes to mean something that
will actually examine or modify a string), but rather a quote-like
operator that acts as a constructor for compiled L<Regexp> objects.
=head1 METHODS
The following methods are inherited by this class' offspring:
=cut
use strict;
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
#####################################################################
# PPI::Token::Regexp Methods
=pod
=head2 get_match_string
The C<get_match_string> method returns the portion of the regexp that
performs the match.
=cut
sub get_match_string {
return $_[0]->_section_content( 0 );
}
=pod
=head2 get_substitute_string
The C<get_substitute_string> method returns the portion of the regexp
that is substituted for the match, if any. If the regexp does not
substitute, C<undef> is returned.
=cut
sub get_substitute_string {
return $_[0]->_section_content( 1 );
}
=pod
=head2 get_modifiers
The C<get_modifiers> method returns the modifiers of the regexp.
=cut
sub get_modifiers {
return $_[0]->_modifiers();
}
=pod
=head2 get_delimiters
The C<get_delimiters> method returns the delimiters of the regexp as
an array. The first element is the delimiters of the match string, and
the second element (if any) is the delimiters of the substitute string
(if any).
=cut
sub get_delimiters {
return $_[0]->_delimiters();
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,76 @@
package PPI::Token::Regexp::Match;
=pod
=head1 NAME
PPI::Token::Regexp::Match - A standard pattern match regex
=head1 INHERITANCE
PPI::Token::Regexp::Match
isa PPI::Token::Regexp
isa PPI::Token
isa PPI::Element
=head1 SYNOPSIS
$text =~ m/match regexp/;
$text =~ /match regexp/;
=head1 DESCRIPTION
A C<PPI::Token::Regexp::Match> object represents a single match regular
expression. Just to be doubly clear, here are things that are and
B<aren't> considered a match regexp.
# Is a match regexp
/This is a match regexp/;
m/Old McDonald had a farm/eieio;
# These are NOT match regexp
qr/This is a regexp quote-like operator/;
s/This is a/replace regexp/;
=head1 METHODS
There are no methods available for C<PPI::Token::Regexp::Match> beyond
those provided by the parent L<PPI::Token::Regexp>, L<PPI::Token> and
L<PPI::Element> classes.
=cut
use strict;
use PPI::Token::Regexp ();
use PPI::Token::_QuoteEngine::Full ();
our $VERSION = '1.270'; # VERSION
our @ISA = qw{
PPI::Token::_QuoteEngine::Full
PPI::Token::Regexp
};
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,66 @@
package PPI::Token::Regexp::Substitute;
=pod
=head1 NAME
PPI::Token::Regexp::Substitute - A match and replace regular expression token
=head1 INHERITANCE
PPI::Token::Regexp::Substitute
isa PPI::Token::Regexp
isa PPI::Token
isa PPI::Element
=head1 SYNOPSIS
$text =~ s/find/$replace/;
=head1 DESCRIPTION
A C<PPI::Token::Regexp::Substitute> object represents a single substitution
regular expression.
=head1 METHODS
There are no methods available for C<PPI::Token::Regexp::Substitute>
beyond those provided by the parent L<PPI::Token::Regexp>, L<PPI::Token>
and L<PPI::Element> classes.
=cut
use strict;
use PPI::Token::Regexp ();
use PPI::Token::_QuoteEngine::Full ();
our $VERSION = '1.270'; # VERSION
our @ISA = qw{
PPI::Token::_QuoteEngine::Full
PPI::Token::Regexp
};
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,70 @@
package PPI::Token::Regexp::Transliterate;
=pod
=head1 NAME
PPI::Token::Regexp::Transliterate - A transliteration regular expression token
=head1 INHERITANCE
PPI::Token::Regexp::Transliterate
isa PPI::Token::Regexp
isa PPI::Token
isa PPI::Element
=head1 SYNOPSIS
$text =~ tr/abc/xyz/;
=head1 DESCRIPTION
A C<PPI::Token::Regexp::Transliterate> object represents a single
transliteration regular expression.
I'm afraid you'll have to excuse the ridiculously long class name, but
when push came to shove I ended up going for pedantically correct
names for things (practically cut and paste from the various docs).
=head1 METHODS
There are no methods available for C<PPI::Token::Regexp::Transliterate>
beyond those provided by the parent L<PPI::Token::Regexp>, L<PPI::Token>
and L<PPI::Element> classes.
=cut
use strict;
use PPI::Token::Regexp ();
use PPI::Token::_QuoteEngine::Full ();
our $VERSION = '1.270'; # VERSION
our @ISA = qw{
PPI::Token::_QuoteEngine::Full
PPI::Token::Regexp
};
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,64 @@
package PPI::Token::Separator;
=pod
=head1 NAME
PPI::Token::Separator - The __DATA__ and __END__ tags
=head1 INHERITANCE
PPI::Token::Separator
isa PPI::Token::Word
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
Although superficially looking like a normal L<PPI::Token::Word> object,
when the C<__DATA__> and C<__END__> compiler tags appear at the beginning of
a line (on supposedly) their own line, these tags become file section
separators.
The indicate that the time for Perl code is over, and the rest of the
file is dedicated to something else (data in the case of C<__DATA__>) or
to nothing at all (in the case of C<__END__>).
=head1 METHODS
This class has no methods beyond what is provided by its
L<PPI::Token::Word>, L<PPI::Token> and L<PPI::Element>
parent classes.
=cut
use strict;
use PPI::Token::Word ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token::Word";
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,209 @@
package PPI::Token::Structure;
=pod
=head1 NAME
PPI::Token::Structure - Token class for characters that define code structure
=head1 INHERITANCE
PPI::Token::Structure
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The C<PPI::Token::Structure> class is used for tokens that control the
general tree structure or code.
This consists of seven characters. These are the six brace characters from
the "round", "curly" and "square" pairs, plus the semi-colon statement
separator C<;>.
=head1 METHODS
This class has no methods beyond what is provided by its
L<PPI::Token> and L<PPI::Element> parent classes.
=cut
use strict;
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
# Set the matching braces, done as an array
# for slightly faster lookups.
my %MATCH = (
ord '{' => '}',
ord '}' => '{',
ord '[' => ']',
ord ']' => '[',
ord '(' => ')',
ord ')' => '(',
);
my %OPENS = (
ord '{' => 1,
ord '[' => 1,
ord '(' => 1,
);
my %CLOSES = (
ord '}' => 1,
ord ']' => 1,
ord ')' => 1,
);
#####################################################################
# Tokenizer Methods
sub __TOKENIZER__on_char {
# Structures are one character long, always.
# Finalize and process again.
$_[1]->_finalize_token->__TOKENIZER__on_char( $_[1] );
}
sub __TOKENIZER__commit {
my $t = $_[1];
$t->_new_token( 'Structure', substr( $t->{line}, $t->{line_cursor}, 1 ) );
$t->_finalize_token;
0;
}
#####################################################################
# Lexer Methods
# For a given brace, find its opposing pair
sub __LEXER__opposite {
$MATCH{ord $_[0]->{content}};
}
#####################################################################
# PPI::Element Methods
# There is a unusual situation in regards to "siblings".
#
# As an Element, braces sit outside the normal tree structure, and in
# this context they NEVER have siblings.
#
# However, as tokens they DO have siblings.
#
# As such, we need special versions of _all_ of the sibling methods to
# handle this.
#
# Statement terminators do not have these problems, and for them sibling
# calls work as normal, and so they can just be passed upwards.
sub next_sibling {
return $_[0]->SUPER::next_sibling if $_[0]->{content} eq ';';
return '';
}
sub snext_sibling {
return $_[0]->SUPER::snext_sibling if $_[0]->{content} eq ';';
return '';
}
sub previous_sibling {
return $_[0]->SUPER::previous_sibling if $_[0]->{content} eq ';';
return '';
}
sub sprevious_sibling {
return $_[0]->SUPER::sprevious_sibling if $_[0]->{content} eq ';';
return '';
}
sub next_token {
my $self = shift;
return $self->SUPER::next_token if $self->{content} eq ';';
my $structure = $self->parent or return '';
# If this is an opening brace, descend down into our parent
# structure, if it has children.
if ( $OPENS{ ord $self->{content} } ) {
my $child = $structure->child(0);
if ( $child ) {
# Decend deeper, or return if it is a token
return $child->isa('PPI::Token') ? $child : $child->first_token;
} elsif ( $structure->finish ) {
# Empty structure, so next is closing brace
return $structure->finish;
}
# Anything that slips through to here is a structure
# with an opening brace, but no closing brace, so we
# just have to go with it, and continue as we would
# if we started with a closing brace.
}
# We can use the default implement, if we call it from the
# parent structure of the closing brace.
$structure->next_token;
}
sub previous_token {
my $self = shift;
return $self->SUPER::previous_token if $self->{content} eq ';';
my $structure = $self->parent or return '';
# If this is a closing brace, descend down into our parent
# structure, if it has children.
if ( $CLOSES{ ord $self->{content} } ) {
my $child = $structure->child(-1);
if ( $child ) {
# Decend deeper, or return if it is a token
return $child->isa('PPI::Token') ? $child : $child->last_token;
} elsif ( $structure->start ) {
# Empty structure, so next is closing brace
return $structure->start;
}
# Anything that slips through to here is a structure
# with a closing brace, but no opening brace, so we
# just have to go with it, and continue as we would
# if we started with an opening brace.
}
# We can use the default implement, if we call it from the
# parent structure of the closing brace.
$structure->previous_token;
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,241 @@
package PPI::Token::Symbol;
=pod
=head1 NAME
PPI::Token::Symbol - A token class for variables and other symbols
=head1 INHERITANCE
PPI::Token::Symbol
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
The C<PPI::Token::Symbol> class is used to cover all tokens that represent
variables and other things that start with a sigil.
=head1 METHODS
This class has several methods beyond what is provided by its
L<PPI::Token> and L<PPI::Element> parent classes.
Most methods are provided to help work out what the object is actually
pointing at, rather than what it might appear to be pointing at.
=cut
use strict;
use Params::Util qw{_INSTANCE};
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
#####################################################################
# PPI::Token::Symbol Methods
=pod
=head2 canonical
The C<canonical> method returns a normalized, canonical version of the
symbol.
For example, it converts C<$ ::foo'bar::baz> to C<$main::foo::bar::baz>.
This does not fully resolve the symbol, but merely removes syntax
variations.
=cut
sub canonical {
my $symbol = shift->content;
$symbol =~ s/\s+//;
$symbol =~ s/\'/::/g;
$symbol =~ s/(?<=[\$\@\%\&\*])::/main::/;
$symbol;
}
=pod
=head2 symbol
The C<symbol> method returns the ACTUAL symbol this token refers to.
A token of C<$foo> might actually be referring to C<@foo>, if it is found
in the form C<$foo[1]>.
This method attempts to resolve these issues to determine the actual
symbol.
Returns the symbol as a string.
=cut
my %cast_which_trumps_braces = map { $_ => 1 } qw{ $ @ % };
sub symbol {
my $self = shift;
my $symbol = $self->canonical;
# Immediately return the cases where it can't be anything else
my $type = substr( $symbol, 0, 1 );
return $symbol if $type eq '&';
# Unless the next significant Element is a structure, it's correct.
my $after = $self->snext_sibling;
return $symbol unless _INSTANCE($after, 'PPI::Structure');
# Process the rest for cases where it might actually be something else
my $braces = $after->braces;
return $symbol unless defined $braces;
if ( $type eq '$' ) {
# If it is cast to '$' or '@', that trumps any braces
my $before = $self->sprevious_sibling;
return $symbol if $before &&
$before->isa( 'PPI::Token::Cast' ) &&
$cast_which_trumps_braces{ $before->content };
# Otherwise the braces rule
substr( $symbol, 0, 1, '@' ) if $braces eq '[]';
substr( $symbol, 0, 1, '%' ) if $braces eq '{}';
} elsif ( $type eq '@' ) {
substr( $symbol, 0, 1, '%' ) if $braces eq '{}';
} elsif ( $type eq '%' ) {
substr( $symbol, 0, 1, '@' ) if $braces eq '[]';
}
$symbol;
}
=pod
=head2 raw_type
The C<raw_type> method returns the B<apparent> type of the symbol in the
form of its sigil.
Returns the sigil as a string.
=cut
sub raw_type {
substr( $_[0]->content, 0, 1 );
}
=pod
=head2 symbol_type
The C<symbol_type> method returns the B<actual> type of the symbol in the
form of its sigil.
Returns the sigil as a string.
=cut
sub symbol_type {
substr( $_[0]->symbol, 0, 1 );
}
#####################################################################
# Tokenizer Methods
sub __TOKENIZER__on_char {
my $t = $_[1];
# Suck in till the end of the symbol
pos $t->{line} = $t->{line_cursor};
if ( $t->{line} =~ m/\G([\w:\']+)/gc ) {
$t->{token}->{content} .= $1;
$t->{line_cursor} += length $1;
}
# Handle magic things
my $content = $t->{token}->{content};
if ( $content eq '@_' or $content eq '$_' ) {
$t->{class} = $t->{token}->set_class( 'Magic' );
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
# Shortcut for most of the X:: symbols
if ( $content eq '$::' ) {
# May well be an alternate form of a Magic
my $nextchar = substr( $t->{line}, $t->{line_cursor}, 1 );
if ( $nextchar eq '|' ) {
$t->{token}->{content} .= $nextchar;
$t->{line_cursor}++;
$t->{class} = $t->{token}->set_class( 'Magic' );
}
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
if ( $content =~ /^[\$%*@&]::(?:[^\w]|$)/ ) {
my $current = substr( $content, 0, 3, '' );
$t->{token}->{content} = $current;
$t->{line_cursor} -= length( $content );
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
if ( $content =~ /^(?:\$|\@)\d+/ ) {
$t->{class} = $t->{token}->set_class( 'Magic' );
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
# Trim off anything we oversucked...
$content =~ /^(
[\$@%&*]
(?: : (?!:) | # Allow single-colon non-magic variables
(?: \w+ | \' (?!\d) \w+ | \:: \w+ )
(?:
# Allow both :: and ' in namespace separators
(?: \' (?!\d) \w+ | \:: \w+ )
)*
(?: :: )? # Technically a compiler-magic hash, but keep it here
)
)/x or return undef;
unless ( length $1 eq length $content ) {
$t->{line_cursor} += length($1) - length($content);
$t->{token}->{content} = $1;
}
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,479 @@
package PPI::Token::Unknown;
=pod
=head1 NAME
PPI::Token::Unknown - Token of unknown or as-yet undetermined type
=head1 INHERITANCE
PPI::Token::Unknown
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
Object of the type C<PPI::Token::Unknown> exist primarily inside the
tokenizer, where they are temporarily brought into existing for a very
short time to represent a token that could be one of a number of types.
Generally, they only exist for a character or two, after which they are
resolved and converted into the correct type. For an object of this type
to survive the parsing process is considered a major bug.
Please report any C<PPI::Token::Unknown> you encounter in a L<PPI::Document>
object as a bug.
=cut
use strict;
use PPI::Token ();
use PPI::Exception ();
use PPI::Singletons qw' %MAGIC $CURLY_SYMBOL ';
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
#####################################################################
# Tokenizer Methods
sub __TOKENIZER__on_char {
my ( $self, $t ) = @_; # Self and Tokenizer
my $c = $t->{token}->{content}; # Current token
my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); # Current character
# Now, we split on the different values of the current content
if ( $c eq '*' ) {
# Is it a number?
if ( $char =~ /\d/ ) {
# bitwise operator
$t->{class} = $t->{token}->set_class( 'Operator' );
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
if ( $char =~ /[\w:]/ ) {
# Symbol (unless the thing before it is a number
my ( $prev ) = $t->_previous_significant_tokens(1);
if ( not $prev or not $prev->isa('PPI::Token::Number') ) {
$t->{class} = $t->{token}->set_class( 'Symbol' );
return 1;
}
}
if ( $char eq '{' ) {
# Get rest of line
pos $t->{line} = $t->{line_cursor} + 1;
if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) {
# control-character symbol (e.g. *{^_Foo})
$t->{class} = $t->{token}->set_class( 'Magic' );
return 1;
}
}
# Postfix dereference: ->**
if ( $char eq '*' ) {
my ( $prev ) = $t->_previous_significant_tokens(1);
if ( $prev and $prev->isa('PPI::Token::Operator') and $prev->content eq '->' ) {
$t->{class} = $t->{token}->set_class( 'Cast' );
return 1;
}
}
if ( $char eq '*' || $char eq '=' ) {
# Power operator '**' or mult-assign '*='
$t->{class} = $t->{token}->set_class( 'Operator' );
return 1;
}
return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char);
$t->{class} = $t->{token}->set_class( 'Operator' );
return $t->_finalize_token->__TOKENIZER__on_char( $t );
} elsif ( $c eq '$' ) {
# Postfix dereference: ->$* ->$#*
if ( $char eq '*' || $char eq '#' ) {
my ( $prev ) = $t->_previous_significant_tokens(1);
if ( $prev and $prev->isa('PPI::Token::Operator') and $prev->content eq '->' ) {
$t->{class} = $t->{token}->set_class( 'Cast' );
return 1;
}
}
if ( $char =~ /[a-z_]/i ) {
# Symbol
$t->{class} = $t->{token}->set_class( 'Symbol' );
return 1;
}
if ( $MAGIC{ $c . $char } ) {
# Magic variable
$t->{class} = $t->{token}->set_class( 'Magic' );
return 1;
}
if ( $char eq '{' ) {
# Get rest of line
pos $t->{line} = $t->{line_cursor} + 1;
if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) {
# control-character symbol (e.g. ${^MATCH})
$t->{class} = $t->{token}->set_class( 'Magic' );
return 1;
}
}
# Must be a cast
$t->{class} = $t->{token}->set_class( 'Cast' );
return $t->_finalize_token->__TOKENIZER__on_char( $t );
} elsif ( $c eq '@' ) {
# Postfix dereference: ->@*
if ( $char eq '*' ) {
my ( $prev ) = $t->_previous_significant_tokens(1);
if ( $prev and $prev->isa('PPI::Token::Operator') and $prev->content eq '->' ) {
$t->{class} = $t->{token}->set_class( 'Cast' );
return 1;
}
}
if ( $char =~ /[\w:]/ ) {
# Symbol
$t->{class} = $t->{token}->set_class( 'Symbol' );
return 1;
}
if ( $MAGIC{ $c . $char } ) {
# Magic variable
$t->{class} = $t->{token}->set_class( 'Magic' );
return 1;
}
if ( $char eq '{' ) {
# Get rest of line
pos $t->{line} = $t->{line_cursor} + 1;
if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) {
# control-character symbol (e.g. @{^_Foo})
$t->{class} = $t->{token}->set_class( 'Magic' );
return 1;
}
}
# Must be a cast
$t->{class} = $t->{token}->set_class( 'Cast' );
return $t->_finalize_token->__TOKENIZER__on_char( $t );
} elsif ( $c eq '%' ) {
# Postfix dereference: ->%* ->%[...]
if ( $char eq '*' || $char eq '[' ) {
my ( $prev ) = $t->_previous_significant_tokens(1);
if ( $prev and $prev->isa('PPI::Token::Operator') and $prev->content eq '->' ) {
if ( $char eq '*' ) {
$t->{class} = $t->{token}->set_class( 'Cast' );
return 1;
}
if ( $char eq '[' ) {
$t->{class} = $t->{token}->set_class( 'Cast' );
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
}
}
# Is it a number?
if ( $char =~ /\d/ ) {
# bitwise operator
$t->{class} = $t->{token}->set_class( 'Operator' );
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
# Is it a magic variable?
if ( $char eq '^' || $MAGIC{ $c . $char } ) {
$t->{class} = $t->{token}->set_class( 'Magic' );
return 1;
}
if ( $char =~ /[\w:]/ ) {
# Symbol (unless the thing before it is a number
my ( $prev ) = $t->_previous_significant_tokens(1);
if ( not $prev or not $prev->isa('PPI::Token::Number') ) {
$t->{class} = $t->{token}->set_class( 'Symbol' );
return 1;
}
}
if ( $char eq '{' ) {
# Get rest of line
pos $t->{line} = $t->{line_cursor} + 1;
if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) {
# control-character symbol (e.g. %{^_Foo})
$t->{class} = $t->{token}->set_class( 'Magic' );
return 1;
}
}
return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char);
# Probably the mod operator
$t->{class} = $t->{token}->set_class( 'Operator' );
return $t->{class}->__TOKENIZER__on_char( $t );
} elsif ( $c eq '&' ) {
# Postfix dereference: ->&*
if ( $char eq '*' ) {
my ( $prev ) = $t->_previous_significant_tokens(1);
if ( $prev and $prev->isa('PPI::Token::Operator') and $prev->content eq '->' ) {
$t->{class} = $t->{token}->set_class( 'Cast' );
return 1;
}
}
# Is it a number?
if ( $char =~ /\d/ ) {
# bitwise operator
$t->{class} = $t->{token}->set_class( 'Operator' );
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
if ( $char =~ /[\w:]/ ) {
# Symbol (unless the thing before it is a number
my ( $prev ) = $t->_previous_significant_tokens(1);
if ( not $prev or not $prev->isa('PPI::Token::Number') ) {
$t->{class} = $t->{token}->set_class( 'Symbol' );
return 1;
}
}
return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char);
# Probably the binary and operator
$t->{class} = $t->{token}->set_class( 'Operator' );
return $t->{class}->__TOKENIZER__on_char( $t );
} elsif ( $c eq '-' ) {
if ( $char =~ /\d/o ) {
# Number
$t->{class} = $t->{token}->set_class( 'Number' );
return 1;
}
if ( $char eq '.' ) {
# Number::Float
$t->{class} = $t->{token}->set_class( 'Number::Float' );
return 1;
}
if ( $char =~ /[a-zA-Z]/ ) {
$t->{class} = $t->{token}->set_class( 'DashedWord' );
return 1;
}
# The numeric negative operator
$t->{class} = $t->{token}->set_class( 'Operator' );
return $t->{class}->__TOKENIZER__on_char( $t );
} elsif ( $c eq ':' ) {
if ( $char eq ':' ) {
# ::foo style bareword
$t->{class} = $t->{token}->set_class( 'Word' );
return 1;
}
# Now, : acts very very differently in different contexts.
# Mainly, we need to find out if this is a subroutine attribute.
# We'll leave a hint in the token to indicate that, if it is.
if ( $self->__TOKENIZER__is_an_attribute( $t ) ) {
# This : is an attribute indicator
$t->{class} = $t->{token}->set_class( 'Operator' );
$t->{token}->{_attribute} = 1;
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
# It MIGHT be a label, but it's probably the ?: trinary operator
$t->{class} = $t->{token}->set_class( 'Operator' );
return $t->{class}->__TOKENIZER__on_char( $t );
}
# erm...
PPI::Exception->throw('Unknown value in PPI::Token::Unknown token');
}
sub _is_cast_or_op {
my ( $self, $char ) = @_;
return 1 if $char eq '$';
return 1 if $char eq '@';
return 1 if $char eq '%';
return 1 if $char eq '*';
return 1 if $char eq '{';
return;
}
sub _as_cast_or_op {
my ( $self, $t ) = @_;
my $class = _cast_or_op( $t );
$t->{class} = $t->{token}->set_class( $class );
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
sub _prev_significant_w_cursor {
my ( $tokens, $cursor, $extra_check ) = @_;
while ( $cursor >= 0 ) {
my $token = $tokens->[ $cursor-- ];
next if !$token->significant;
next if $extra_check and !$extra_check->($token);
return ( $token, $cursor );
}
return ( undef, $cursor );
}
# Operator/operand-sensitive, multiple or GLOB cast
sub _cast_or_op {
my ( $t ) = @_;
my $tokens = $t->{tokens};
my $cursor = scalar( @$tokens ) - 1;
my $token;
( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor );
return 'Cast' if !$token; # token was first in the document
if ( $token->isa( 'PPI::Token::Structure' ) and $token->content eq '}' ) {
# Scan the token stream backwards an arbitrarily long way,
# looking for the matching opening curly brace.
my $structure_depth = 1;
( $token, $cursor ) = _prev_significant_w_cursor(
$tokens, $cursor,
sub {
my ( $token ) = @_;
return if !$token->isa( 'PPI::Token::Structure' );
if ( $token eq '}' ) {
$structure_depth++;
return;
}
if ( $token eq '{' ) {
$structure_depth--;
return if $structure_depth;
}
return 1;
}
);
return 'Operator' if !$token; # no matching '{', probably an unbalanced '}'
# Scan past any whitespace
( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor );
return 'Operator' if !$token; # Document began with what must be a hash constructor.
return 'Operator' if $token->isa( 'PPI::Token::Symbol' ); # subscript
my %meth_or_subscript_end = map { $_ => 1 } qw@ -> } ] @;
return 'Operator' if $meth_or_subscript_end{ $token->content }; # subscript
my $content = $token->content;
my $produces_or_wants_value =
( $token->isa( 'PPI::Token::Word' ) and ( $content eq 'do' or $content eq 'eval' ) );
return $produces_or_wants_value ? 'Operator' : 'Cast';
}
my %list_start_or_term_end = map { $_ => 1 } qw@ ; ( { [ @;
return 'Cast'
if $token->isa( 'PPI::Token::Structure' ) and $list_start_or_term_end{ $token->content }
or $token->isa( 'PPI::Token::Cast' )
or $token->isa( 'PPI::Token::Operator' )
or $token->isa( 'PPI::Token::Label' );
return 'Operator' if !$token->isa( 'PPI::Token::Word' );
( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor );
return 'Cast' if !$token || $token->content ne '->';
return 'Operator';
}
# Are we at a location where a ':' would indicate a subroutine attribute
sub __TOKENIZER__is_an_attribute {
my $t = $_[1]; # Tokenizer object
my @tokens = $t->_previous_significant_tokens(3);
my $p0 = $tokens[0];
return '' if not $p0;
# If we just had another attribute, we are also an attribute
return 1 if $p0->isa('PPI::Token::Attribute');
# If we just had a prototype, then we are an attribute
return 1 if $p0->isa('PPI::Token::Prototype');
# Other than that, we would need to have had a bareword
return '' unless $p0->isa('PPI::Token::Word');
# We could be an anonymous subroutine
if ( $p0->isa('PPI::Token::Word') and $p0->content eq 'sub' ) {
return 1;
}
# Or, we could be a named subroutine
my $p1 = $tokens[1];
my $p2 = $tokens[2];
if (
$p1
and
$p1->isa('PPI::Token::Word')
and
$p1->content eq 'sub'
and (
not $p2
or
$p2->isa('PPI::Token::Structure')
or (
$p2->isa('PPI::Token::Whitespace')
and
$p2->content eq ''
)
)
) {
return 1;
}
# We aren't an attribute
'';
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,454 @@
package PPI::Token::Whitespace;
=pod
=head1 NAME
PPI::Token::Whitespace - Tokens representing ordinary white space
=head1 INHERITANCE
PPI::Token::Whitespace
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
As a full "round-trip" parser, PPI records every last byte in a
file and ensure that it is included in the L<PPI::Document> object.
This even includes whitespace. In fact, Perl documents are seen
as "floating in a sea of whitespace", and thus any document will
contain vast quantities of C<PPI::Token::Whitespace> objects.
For the most part, you shouldn't notice them. Or at least, you
shouldn't B<have> to notice them.
This means doing things like consistently using the "S for significant"
series of L<PPI::Node> and L<PPI::Element> methods to do things.
If you want the nth child element, you should be using C<schild> rather
than C<child>, and likewise C<snext_sibling>, C<sprevious_sibling>, and
so on and so forth.
=head1 METHODS
Again, for the most part you should really B<not> need to do anything
very significant with whitespace.
But there are a couple of convenience methods provided, beyond those
provided by the parent L<PPI::Token> and L<PPI::Element> classes.
=cut
use strict;
use Clone ();
use PPI::Token ();
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
=pod
=head2 null
Because L<PPI> sees documents as sitting on a sort of substrate made of
whitespace, there are a couple of corner cases that get particularly
nasty if they don't find whitespace in certain places.
Imagine walking down the beach to go into the ocean, and then quite
unexpectedly falling off the side of the planet. Well it's somewhat
equivalent to that, including the whole screaming death bit.
The C<null> method is a convenience provided to get some internals
out of some of these corner cases.
Specifically it create a whitespace token that represents nothing,
or at least the null string C<''>. It's a handy way to have some
"whitespace" right where you need it, without having to have any
actual characters.
=cut
my $null;
sub null {
$null ||= $_[0]->new('');
Clone::clone($null);
}
### XS -> PPI/XS.xs:_PPI_Token_Whitespace__significant 0.900+
sub significant() { '' }
=pod
=head2 tidy
C<tidy> is a convenience method for removing unneeded whitespace.
Specifically, it removes any whitespace from the end of a line.
Note that this B<doesn't> include POD, where you may well need
to keep certain types of whitespace. The entire POD chunk lives
in its own L<PPI::Token::Pod> object.
=cut
sub tidy {
$_[0]->{content} =~ s/^\s+?(?>\n)//;
1;
}
#####################################################################
# Parsing Methods
# Build the class and commit maps
my %COMMITMAP = (
map( { ord $_ => 'PPI::Token::Word' } 'a' .. 'u', 'A' .. 'Z', qw" w y z _ " ), # no v or x
map( { ord $_ => 'PPI::Token::Structure' } qw" ; [ ] { } ) " ),
ord '#' => 'PPI::Token::Comment',
ord 'v' => 'PPI::Token::Number::Version',
);
my %CLASSMAP = (
map( { ord $_ => 'Number' } 0 .. 9 ),
map( { ord $_ => 'Operator' } qw" = ? | + > . ! ~ ^ " ),
map( { ord $_ => 'Unknown' } qw" * $ @ & : % " ),
ord ',' => 'PPI::Token::Operator',
ord "'" => 'Quote::Single',
ord '"' => 'Quote::Double',
ord '`' => 'QuoteLike::Backtick',
ord '\\' => 'Cast',
ord '_' => 'Word',
9 => 'Whitespace', # A horizontal tab
10 => 'Whitespace', # A newline
12 => 'Whitespace', # A form feed
13 => 'Whitespace', # A carriage return
32 => 'Whitespace', # A normal space
);
# Words (functions and keywords) after which a following / is
# almost certainly going to be a regex
my %MATCHWORD = map { $_ => 1 } qw{
return
split
if
unless
grep
map
};
sub __TOKENIZER__on_line_start {
my $t = $_[1];
my $line = $t->{line};
# Can we classify the entire line in one go
if ( $line =~ /^\s*$/ ) {
# A whitespace line
$t->_new_token( 'Whitespace', $line );
return 0;
} elsif ( $line =~ /^\s*#/ ) {
# A comment line
$t->_new_token( 'Comment', $line );
$t->_finalize_token;
return 0;
} elsif ( $line =~ /^=(\w+)/ ) {
# A Pod tag... change to pod mode
$t->_new_token( 'Pod', $line );
if ( $1 eq 'cut' ) {
# This is an error, but one we'll ignore
# Don't go into Pod mode, since =cut normally
# signals the end of Pod mode
} else {
$t->{class} = 'PPI::Token::Pod';
}
return 0;
} elsif ( $line =~ /^use v6\-alpha\;/ ) {
# Indicates a Perl 6 block. Make the initial
# implementation just suck in the entire rest of the
# file.
my @perl6;
while ( 1 ) {
my $line6 = $t->_get_line;
last unless defined $line6;
push @perl6, $line6;
}
push @{ $t->{perl6} }, join '', @perl6;
# We only sucked in the block, we don't actually do
# anything to the "use v6..." line. So return as if
# we didn't find anything at all.
return 1;
}
1;
}
sub __TOKENIZER__on_char {
my $t = $_[1];
my $c = substr $t->{line}, $t->{line_cursor}, 1;
my $char = ord $c;
# Do we definitely know what something is?
return $COMMITMAP{$char}->__TOKENIZER__commit($t) if $COMMITMAP{$char};
# Handle the simple option first
return $CLASSMAP{$char} if $CLASSMAP{$char};
if ( $char == 40 ) { # $char eq '('
# Finalise any whitespace token...
$t->_finalize_token if $t->{token};
# Is this the beginning of a sub prototype?
# We are a sub prototype IF
# 1. The previous significant token is a bareword.
# 2. The one before that is the word 'sub'.
# 3. The one before that is a 'structure'
# Get the three previous significant tokens
my @tokens = $t->_previous_significant_tokens(3);
# A normal subroutine declaration
my $p1 = $tokens[1];
my $p2 = $tokens[2];
if (
$tokens[0]
and
$tokens[0]->isa('PPI::Token::Word')
and
$p1
and
$p1->isa('PPI::Token::Word')
and
$p1->content eq 'sub'
and (
not $p2
or
$p2->isa('PPI::Token::Structure')
or (
$p2->isa('PPI::Token::Whitespace')
and
$p2->content eq ''
)
or (
# Lexical subroutine
$p2->isa('PPI::Token::Word')
and
$p2->content =~ /^(?:my|our|state)$/
)
)
) {
# This is a sub prototype
return 'Prototype';
}
# A prototyped anonymous subroutine
my $p0 = $tokens[0];
if ( $p0 and $p0->isa('PPI::Token::Word') and $p0->content eq 'sub'
# Maybe it's invoking a method named 'sub'
and not ( $p1 and $p1->isa('PPI::Token::Operator') and $p1->content eq '->')
) {
return 'Prototype';
}
# This is a normal open bracket
return 'Structure';
} elsif ( $char == 60 ) { # $char eq '<'
# Finalise any whitespace token...
$t->_finalize_token if $t->{token};
# This is either "less than" or "readline quote-like"
# Do some context stuff to guess which.
my $prev = $t->_last_significant_token;
# The most common group of less-thans are used like
# $foo < $bar
# 1 < $bar
# $#foo < $bar
return 'Operator' if $prev and $prev->isa('PPI::Token::Symbol');
return 'Operator' if $prev and $prev->isa('PPI::Token::Magic');
return 'Operator' if $prev and $prev->isa('PPI::Token::Number');
return 'Operator' if $prev and $prev->isa('PPI::Token::ArrayIndex');
# If it is <<... it's a here-doc instead
my $next_char = substr( $t->{line}, $t->{line_cursor} + 1, 2 );
return 'Operator' if $next_char =~ /<[^>]/;
return 'Operator' if not $prev;
# The most common group of readlines are used like
# while ( <...> )
# while <>;
my $prec = $prev->content;
return 'QuoteLike::Readline'
if ( $prev->isa('PPI::Token::Structure') and $prec eq '(' )
or ( $prev->isa('PPI::Token::Structure') and $prec eq ';' )
or ( $prev->isa('PPI::Token::Word') and $prec eq 'while' )
or ( $prev->isa('PPI::Token::Operator') and $prec eq '=' )
or ( $prev->isa('PPI::Token::Operator') and $prec eq ',' );
if ( $prev->isa('PPI::Token::Structure') and $prec eq '}' ) {
# Could go either way... do a regex check
# $foo->{bar} < 2;
# grep { .. } <foo>;
pos $t->{line} = $t->{line_cursor};
if ( $t->{line} =~ m/\G<(?!\d)\w+>/gc ) {
# Almost definitely readline
return 'QuoteLike::Readline';
}
}
# Otherwise, we guess operator, which has been the default up
# until this more comprehensive section was created.
return 'Operator';
} elsif ( $char == 47 ) { # $char eq '/'
# Finalise any whitespace token...
$t->_finalize_token if $t->{token};
# This is either a "divided by" or a "start regex"
# Do some context stuff to guess ( ack ) which.
# Hopefully the guess will be good enough.
my $prev = $t->_last_significant_token;
# Or as the very first thing in a file
return 'Regexp::Match' if not $prev;
my $prec = $prev->content;
# Most times following an operator, we are a regex.
# This includes cases such as:
# , - As an argument in a list
# .. - The second condition in a flip flop
# =~ - A bound regex
# !~ - Ditto
return 'Regexp::Match' if $prev->isa('PPI::Token::Operator');
# After a symbol
return 'Operator' if $prev->isa('PPI::Token::Symbol');
if ( $prec eq ']' and $prev->isa('PPI::Token::Structure') ) {
return 'Operator';
}
# After another number
return 'Operator' if $prev->isa('PPI::Token::Number');
# After going into scope/brackets
if (
$prev->isa('PPI::Token::Structure')
and (
$prec eq '('
or
$prec eq '{'
or
$prec eq ';'
)
) {
return 'Regexp::Match';
}
# Functions and keywords
if (
$MATCHWORD{$prec}
and
$prev->isa('PPI::Token::Word')
) {
return 'Regexp::Match';
}
# What about the char after the slash? There's some things
# that would be highly illogical to see if it's an operator.
my $next_char = substr $t->{line}, $t->{line_cursor} + 1, 1;
if ( defined $next_char and length $next_char ) {
if ( $next_char =~ /(?:\^|\[|\\)/ ) {
return 'Regexp::Match';
}
}
# Otherwise... erm... assume operator?
# Add more tests here as potential cases come to light
return 'Operator';
} elsif ( $char == 120 ) { # $char eq 'x'
# Could be a word, the x= operator, the x operator
# followed by whitespace, or the x operator without any
# space between itself and its operand, e.g.: '$a x3',
# which is the same as '$a x 3'. _current_x_is_operator
# assumes we have a complete 'x' token, but we don't
# yet. We may need to split this x character apart from
# what follows it.
if ( $t->_current_x_is_operator ) {
pos $t->{line} = $t->{line_cursor} + 1;
return 'Operator' if $t->{line} =~ m/\G(?:
\d # x op with no whitespace e.g. 'x3'
|
(?!( # negative lookahead
=> # not on left of fat comma
|
\w # not a word like "xyzzy"
|
\s # not x op plus whitespace
))
)/gcx;
}
# Otherwise, commit like a normal bareword, including x
# operator followed by whitespace.
return PPI::Token::Word->__TOKENIZER__commit($t);
} elsif ( $char == 45 ) { # $char eq '-'
# Look for an obvious operator operand context
my $context = $t->_opcontext;
if ( $context eq 'operator' ) {
return 'Operator';
} else {
# More logic needed
return 'Unknown';
}
} elsif ( $char >= 128 ) { # Outside ASCII
return 'PPI::Token::Word'->__TOKENIZER__commit($t) if $c =~ /\w/;
return 'Whitespace' if $c =~ /\s/;
}
# All the whitespaces are covered, so what to do
### For now, die
PPI::Exception->throw("Encountered unexpected character '$char'");
}
sub __TOKENIZER__on_line_end {
$_[1]->_finalize_token if $_[1]->{token};
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,373 @@
package PPI::Token::Word;
=pod
=head1 NAME
PPI::Token::Word - The generic "word" Token
=head1 INHERITANCE
PPI::Token::Word
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
A C<PPI::Token::Word> object is a PPI-specific representation of several
different types of word-like things, and is one of the most common Token
classes found in typical documents.
Specifically, it includes not only barewords, but also any other valid
Perl identifier including non-operator keywords and core functions, and
any include C<::> separators inside it, as long as it fits the
format of a class, function, etc.
=head1 METHODS
There are no methods available for C<PPI::Token::Word> beyond those
provided by its L<PPI::Token> and L<PPI::Element> parent
classes.
We expect to add additional methods to help further resolve a Word as
a function, method, etc over time. If you need such a thing right
now, look at L<Perl::Critic::Utils>.
=cut
use strict;
use PPI::Token ();
use PPI::Singletons qw' %OPERATOR %QUOTELIKE %KEYWORDS ';
our $VERSION = '1.270'; # VERSION
our @ISA = "PPI::Token";
=pod
=head2 literal
Returns the value of the Word as a string. This assumes (often
incorrectly) that the Word is a bareword and not a function, method,
keyword, etc. This differs from C<content> because C<Foo'Bar> expands
to C<Foo::Bar>.
=cut
sub literal {
my $self = shift;
my $word = $self->content;
# Expand Foo'Bar to Foo::Bar
$word =~ s/\'/::/g;
return $word;
}
=pod
=head2 method_call
Answers whether this is the name of a method in a method call. Returns true if
yes, false if no, and nothing if unknown.
=cut
sub method_call {
my $self = shift;
my $previous = $self->sprevious_sibling;
if (
$previous
and
$previous->isa('PPI::Token::Operator')
and
$previous->content eq '->'
) {
return 1;
}
my $snext = $self->snext_sibling;
return 0 unless $snext;
if (
$snext->isa('PPI::Structure::List')
or
$snext->isa('PPI::Token::Structure')
or
$snext->isa('PPI::Token::Operator')
and (
$snext->content eq ','
or
$snext->content eq '=>'
)
) {
return 0;
}
if (
$snext->isa('PPI::Token::Word')
and
$snext->content =~ m< \w :: \z >xms
) {
return 1;
}
return;
}
sub __TOKENIZER__on_char {
my $class = shift;
my $t = shift;
# Suck in till the end of the bareword
pos $t->{line} = $t->{line_cursor};
if ( $t->{line} =~ m/\G(\w+(?:(?:\'|::)\w+)*(?:::)?)/gc ) {
my $word = $1;
# Special Case: If we accidentally treat eq'foo' like
# the word "eq'foo", then just make 'eq' (or whatever
# else is in the %KEYWORDS hash.
if ( $word =~ /^(\w+)'/ && $KEYWORDS{$1} ) {
$word = $1;
}
$t->{token}->{content} .= $word;
$t->{line_cursor} += length $word;
}
# We might be a subroutine attribute.
if ( __current_token_is_attribute($t) ) {
$t->{class} = $t->{token}->set_class( 'Attribute' );
return $t->{class}->__TOKENIZER__commit( $t );
}
my $word = $t->{token}->{content};
if ( $KEYWORDS{$word} ) {
# Check for a Perl keyword that is forced to be a normal word instead
if ( $t->__current_token_is_forced_word ) {
$t->{class} = $t->{token}->set_class( 'Word' );
return $t->{class}->__TOKENIZER__on_char( $t );
}
# Check for a quote like operator. %QUOTELIKE must be subset of %KEYWORDS
if ( $QUOTELIKE{$word} ) {
$t->{class} = $t->{token}->set_class( $QUOTELIKE{$word} );
return $t->{class}->__TOKENIZER__on_char( $t );
}
# Or one of the word operators. %OPERATOR must be subset of %KEYWORDS
if ( $OPERATOR{$word} ) {
$t->{class} = $t->{token}->set_class( 'Operator' );
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
}
# Unless this is a simple identifier, at this point
# it has to be a normal bareword
if ( $word =~ /\:/ ) {
return $t->_finalize_token->__TOKENIZER__on_char( $t );
}
# If the NEXT character in the line is a colon, this
# is a label.
my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
if ( $char eq ':' ) {
$t->{token}->{content} .= ':';
$t->{line_cursor}++;
$t->{class} = $t->{token}->set_class( 'Label' );
# If not a label, '_' on its own is the magic filehandle
} elsif ( $word eq '_' ) {
$t->{class} = $t->{token}->set_class( 'Magic' );
}
# Finalise and process the character again
$t->_finalize_token->__TOKENIZER__on_char( $t );
}
# We are committed to being a bareword.
# Or so we would like to believe.
sub __TOKENIZER__commit {
my ($class, $t) = @_;
# Our current position is the first character of the bareword.
# Capture the bareword.
pos $t->{line} = $t->{line_cursor};
unless ( $t->{line} =~ m/\G((?!\d)\w+(?:(?:\'|::)\w+)*(?:::)?)/gc ) {
# Programmer error
die sprintf "Fatal error... regex failed to match in '%s' when expected", substr $t->{line}, $t->{line_cursor};
}
# Special Case: If we accidentally treat eq'foo' like the word "eq'foo",
# then unwind it and just make it 'eq' (or the other stringy comparitors)
my $word = $1;
if ( $word =~ /^(\w+)'/ && $KEYWORDS{$1} ) {
$word = $1;
}
# Advance the position one after the end of the bareword
$t->{line_cursor} += length $word;
# We might be a subroutine attribute.
if ( __current_token_is_attribute($t) ) {
$t->_new_token( 'Attribute', $word );
return ($t->{line_cursor} >= $t->{line_length}) ? 0
: $t->{class}->__TOKENIZER__on_char($t);
}
# Check for the end of the file
if ( $word eq '__END__' ) {
# Create the token for the __END__ itself
$t->_new_token( 'Separator', $1 );
$t->_finalize_token;
# Move into the End zone (heh)
$t->{zone} = 'PPI::Token::End';
# Add the rest of the line as a comment, and a whitespace newline
# Anything after the __END__ on the line is "ignored". So we must
# also ignore it, by turning it into a comment.
my $end_rest = substr( $t->{line}, $t->{line_cursor} );
$t->{line_cursor} = length $t->{line};
if ( $end_rest =~ /\n$/ ) {
chomp $end_rest;
$t->_new_token( 'Comment', $end_rest ) if length $end_rest;
$t->_new_token( 'Whitespace', "\n" );
} else {
$t->_new_token( 'Comment', $end_rest ) if length $end_rest;
}
$t->_finalize_token;
return 0;
}
# Check for the data section
if ( $word eq '__DATA__' ) {
# Create the token for the __DATA__ itself
$t->_new_token( 'Separator', "$1" );
$t->_finalize_token;
# Move into the Data zone
$t->{zone} = 'PPI::Token::Data';
# Add the rest of the line as the Data token
my $data_rest = substr( $t->{line}, $t->{line_cursor} );
$t->{line_cursor} = length $t->{line};
if ( $data_rest =~ /\n$/ ) {
chomp $data_rest;
$t->_new_token( 'Comment', $data_rest ) if length $data_rest;
$t->_new_token( 'Whitespace', "\n" );
} else {
$t->_new_token( 'Comment', $data_rest ) if length $data_rest;
}
$t->_finalize_token;
return 0;
}
my $token_class;
if ( $word =~ /\:/ ) {
# Since it's not a simple identifier...
$token_class = 'Word';
} elsif ( $KEYWORDS{$word} and $t->__current_token_is_forced_word ) {
$token_class = 'Word';
} elsif ( $QUOTELIKE{$word} ) {
# Special Case: A Quote-like operator
$t->_new_token( $QUOTELIKE{$word}, $word );
return ($t->{line_cursor} >= $t->{line_length}) ? 0
: $t->{class}->__TOKENIZER__on_char( $t );
} elsif ( $OPERATOR{$word} && ($word ne 'x' || $t->_current_x_is_operator) ) {
# Word operator
$token_class = 'Operator';
} else {
# Get tokens early to be sure to not disturb state set up by pos and m//gc.
my @tokens = $t->_previous_significant_tokens(1);
# If the next character is a ':' then it's a label...
pos $t->{line} = $t->{line_cursor};
if ( $t->{line} =~ m/\G(\s*:)(?!:)/gc ) {
if ( $tokens[0] and $tokens[0]->{content} eq 'sub' ) {
# ... UNLESS it's after 'sub' in which
# case it is a sub name and an attribute
# operator.
# We COULD have checked this at the top
# level of checks, but this would impose
# an additional performance per-word
# penalty, and every other case where the
# attribute operator doesn't directly
# touch the object name already works.
$token_class = 'Word';
} else {
$word .= $1;
$t->{line_cursor} += length($1);
$token_class = 'Label';
}
} elsif ( $word eq '_' ) {
$token_class = 'Magic';
} else {
$token_class = 'Word';
}
}
# Create the new token and finalise
$t->_new_token( $token_class, $word );
if ( $t->{line_cursor} >= $t->{line_length} ) {
# End of the line
$t->_finalize_token;
return 0;
}
$t->_finalize_token->__TOKENIZER__on_char($t);
}
# Is the current Word really a subroutine attribute?
sub __current_token_is_attribute {
my ( $t ) = @_;
my @tokens = $t->_previous_significant_tokens(1);
return (
$tokens[0]
and (
# hint from tokenizer
$tokens[0]->{_attribute}
# nothing between attribute and us except whitespace
or $tokens[0]->isa('PPI::Token::Attribute')
)
);
}
1;
=pod
=head1 TO DO
- Add C<function>, C<method> etc detector methods
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,240 @@
package PPI::Token::_QuoteEngine;
=pod
=head1 NAME
PPI::Token::_QuoteEngine - The PPI Quote Engine
=head1 DESCRIPTION
The C<PPI::Token::_QuoteEngine> package is designed hold functionality
for processing quotes and quote like operators, including regexes.
These have special requirements in parsing.
The C<PPI::Token::_QuoteEngine> package itself provides various parsing
methods, which the L<PPI::Token::Quote>, L<PPI::Token::QuoteLike> and
L<PPI::Token::Regexp> can inherit from. In this sense, it serves
as a base class.
=head2 Using this class
I<(Refers only to internal uses. This class does not provide a
public interface)>
To use these, you should initialize them as normal C<'$Class-E<gt>new'>,
and then call the 'fill' method, which will cause the specialised
parser to scan forwards and parse the quote to its end point.
If -E<gt>fill returns true, finalise the token.
=cut
use strict;
use Carp ();
our $VERSION = '1.270'; # VERSION
# Hook for the __TOKENIZER__on_char token call
sub __TOKENIZER__on_char {
my $class = shift;
my $t = $_[0]->{token} ? shift : return undef;
# Call the fill method to process the quote
my $rv = $t->{token}->_fill( $t );
return undef unless defined $rv;
## Doesn't support "end of file" indicator
# Finalize the token and return 0 to tell the tokenizer
# to go to the next character.
$t->_finalize_token;
0;
}
#####################################################################
# Optimised character processors, used for quotes
# and quote like stuff, and accessible to the child classes
# An outright scan, raw and fast.
# Searches for a particular character, not escaped, loading in new
# lines as needed.
# When called, we start at the current position.
# When leaving, the position should be set to the position
# of the character, NOT the one after it.
sub _scan_for_unescaped_character {
my $class = shift;
my $t = shift;
my $char = (length $_[0] == 1) ? quotemeta shift : return undef;
# Create the search regex.
# Same as above but with a negative look-behind assertion.
my $search = qr/(.*?(?<!\\)(?:\\\\)*$char)/;
my $string = '';
while ( exists $t->{line} ) {
# Get the search area for the current line
pos $t->{line} = $t->{line_cursor};
# Can we find a match on this line
if ( $t->{line} =~ m/\G$search/gc ) {
# Found the character on this line
$t->{line_cursor} += length($1) - 1;
return $string . $1;
}
# Load in the next line
$string .= substr $t->{line}, $t->{line_cursor};
my $rv = $t->_fill_line('inscan');
if ( $rv ) {
# Push to first character
$t->{line_cursor} = 0;
} elsif ( defined $rv ) {
# We hit the End of File
return \$string;
} else {
# Unexpected error
return undef;
}
}
# We shouldn't be able to get here
return undef;
}
# Scan for a close braced, and take into account both escaping,
# and open close bracket pairs in the string. When complete, the
# method leaves the line cursor on the LAST character found.
sub _scan_for_brace_character {
my $class = shift;
my $t = shift;
my $close_brace = $_[0] =~ /^(?:\>|\)|\}|\])$/ ? shift : Carp::confess(''); # return undef;
my $open_brace = $close_brace;
$open_brace =~ tr/\>\)\}\]/\<\(\{\[/;
# Create the search string
$close_brace = quotemeta $close_brace;
$open_brace = quotemeta $open_brace;
my $search = qr/\G(.*?(?<!\\)(?:\\\\)*(?:$open_brace|$close_brace))/;
# Loop as long as we can get new lines
my $string = '';
my $depth = 1;
while ( exists $t->{line} ) {
# Get the search area
pos $t->{line} = $t->{line_cursor};
# Look for a match
unless ( $t->{line} =~ /$search/gc ) {
# Load in the next line
$string .= substr( $t->{line}, $t->{line_cursor} );
my $rv = $t->_fill_line('inscan');
if ( $rv ) {
# Push to first character
$t->{line_cursor} = 0;
next;
}
if ( defined $rv ) {
# We hit the End of File
return \$string;
}
# Unexpected error
return undef;
}
# Add to the string
$string .= $1;
$t->{line_cursor} += length $1;
# Alter the depth and continue if we aren't at the end
$depth += ($1 =~ /$open_brace$/) ? 1 : -1 and next;
# Rewind the cursor by one character ( cludgy hack )
$t->{line_cursor} -= 1;
return $string;
}
# Returning the string as a reference indicates EOF
\$string;
}
# Find all spaces and comments, up to, but not including
# the first non-whitespace character.
#
# Although it doesn't return it, it leaves the cursor
# on the character following the gap
sub _scan_quote_like_operator_gap {
my $t = $_[1];
my $string = '';
while ( exists $t->{line} ) {
# Get the search area for the current line
pos $t->{line} = $t->{line_cursor};
# Since this regex can match zero characters, it should always match
$t->{line} =~ /\G(\s*(?:\#.*)?)/gc or return undef;
# Add the chars found to the string
$string .= $1;
# Did we match the entire line?
unless ( $t->{line_cursor} + length $1 == length $t->{line} ) {
# Partial line match, which means we are at
# the end of the gap. Fix the cursor and return
# the string.
$t->{line_cursor} += length $1;
return $string;
}
# Load in the next line.
# If we reach the EOF, $t->{line} gets deleted,
# which is caught by the while.
my $rv = $t->_fill_line('inscan');
if ( $rv ) {
# Set the cursor to the first character
$t->{line_cursor} = 0;
} elsif ( defined $rv ) {
# Returning the string as a reference indicates EOF
return \$string;
} else {
return undef;
}
}
# Shouldn't be able to get here
return undef;
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,448 @@
package PPI::Token::_QuoteEngine::Full;
# Full quote engine
use strict;
use Clone ();
use Carp ();
use PPI::Token::_QuoteEngine ();
our $VERSION = '1.270'; # VERSION
our @ISA = 'PPI::Token::_QuoteEngine';
# Prototypes for the different braced sections
my %SECTIONS = (
'(' => { type => '()', _close => ')' },
'<' => { type => '<>', _close => '>' },
'[' => { type => '[]', _close => ']' },
'{' => { type => '{}', _close => '}' },
);
# For each quote type, the extra fields that should be set.
# This should give us faster initialization.
my %QUOTES = (
'q' => { operator => 'q', braced => undef, separator => undef, _sections => 1 },
'qq' => { operator => 'qq', braced => undef, separator => undef, _sections => 1 },
'qx' => { operator => 'qx', braced => undef, separator => undef, _sections => 1 },
'qw' => { operator => 'qw', braced => undef, separator => undef, _sections => 1 },
'qr' => { operator => 'qr', braced => undef, separator => undef, _sections => 1, modifiers => 1 },
'm' => { operator => 'm', braced => undef, separator => undef, _sections => 1, modifiers => 1 },
's' => { operator => 's', braced => undef, separator => undef, _sections => 2, modifiers => 1 },
'tr' => { operator => 'tr', braced => undef, separator => undef, _sections => 2, modifiers => 1 },
# Y is the little-used variant of tr
'y' => { operator => 'y', braced => undef, separator => undef, _sections => 2, modifiers => 1 },
'/' => { operator => undef, braced => 0, separator => '/', _sections => 1, modifiers => 1 },
# Angle brackets quotes mean "readline(*FILEHANDLE)"
'<' => { operator => undef, braced => 1, separator => undef, _sections => 1, },
# The final ( and kind of depreciated ) "first match only" one is not
# used yet, since I'm not sure on the context differences between
# this and the trinary operator, but it's here for completeness.
'?' => { operator => undef, braced => 0, separator => '?', _sections => 1, modifiers => 1 },
);
sub new {
my $class = shift;
my $init = defined $_[0]
? shift
: Carp::croak("::Full->new called without init string");
# Create the token
### This manual SUPER'ing ONLY works because none of
### Token::Quote, Token::QuoteLike and Token::Regexp
### implement a new function of their own.
my $self = PPI::Token::new( $class, $init ) or return undef;
# Do we have a prototype for the initializer? If so, add the extra fields
my $options = $QUOTES{$init} or return $self->_error(
"Unknown quote type '$init'"
);
foreach ( keys %$options ) {
$self->{$_} = $options->{$_};
}
# Set up the modifiers hash if needed
$self->{modifiers} = {} if $self->{modifiers};
# Handle the special < base
if ( $init eq '<' ) {
$self->{sections}->[0] = Clone::clone( $SECTIONS{'<'} );
}
$self;
}
sub _fill {
my $class = shift;
my $t = shift;
my $self = $t->{token}
or Carp::croak("::Full->_fill called without current token");
# Load in the operator stuff if needed
if ( $self->{operator} ) {
# In an operator based quote-like, handle the gap between the
# operator and the opening separator.
if ( substr( $t->{line}, $t->{line_cursor}, 1 ) =~ /\s/ ) {
# Go past the gap
my $gap = $self->_scan_quote_like_operator_gap( $t );
return undef unless defined $gap;
if ( ref $gap ) {
# End of file
$self->{content} .= $$gap;
return 0;
}
$self->{content} .= $gap;
}
# The character we are now on is the separator. Capture,
# and advance into the first section.
my $sep = substr( $t->{line}, $t->{line_cursor}++, 1 );
$self->{content} .= $sep;
# Determine if these are normal or braced type sections
if ( my $section = $SECTIONS{$sep} ) {
$self->{braced} = 1;
$self->{sections}->[0] = Clone::clone($section);
} else {
$self->{braced} = 0;
$self->{separator} = $sep;
}
}
# Parse different based on whether we are normal or braced
my $rv = $self->{braced}
? $self->_fill_braced($t)
: $self->_fill_normal($t);
return $rv if !$rv;
# Return now unless it has modifiers ( i.e. s/foo//eieio )
return 1 unless $self->{modifiers};
# Check for modifiers
my $char;
my $len = 0;
while ( ($char = substr( $t->{line}, $t->{line_cursor} + 1, 1 )) =~ /[^\W\d_]/ ) {
$len++;
$self->{content} .= $char;
$self->{modifiers}->{lc $char} = 1;
$t->{line_cursor}++;
}
}
# Handle the content parsing path for normally separated
sub _fill_normal {
my $self = shift;
my $t = shift;
# Get the content up to the next separator
my $string = $self->_scan_for_unescaped_character( $t, $self->{separator} );
return undef unless defined $string;
if ( ref $string ) {
# End of file
if ( length($$string) > 1 ) {
# Complete the properties for the first section
my $str = $$string;
chop $str;
$self->{sections}->[0] = {
position => length($self->{content}),
size => length($$string) - 1,
type => "$self->{separator}$self->{separator}",
};
$self->{_sections} = 1;
} else {
# No sections at all
$self->{sections} = [ ];
$self->{_sections} = 0;
}
$self->{content} .= $$string;
return 0;
}
# Complete the properties of the first section
$self->{sections}->[0] = {
position => length $self->{content},
size => length($string) - 1,
type => "$self->{separator}$self->{separator}",
};
$self->{content} .= $string;
# We are done if there is only one section
return 1 if $self->{_sections} == 1;
# There are two sections.
# Advance into the next section
$t->{line_cursor}++;
# Get the content up to the end separator
$string = $self->_scan_for_unescaped_character( $t, $self->{separator} );
return undef unless defined $string;
if ( ref $string ) {
# End of file
if ( length($$string) > 1 ) {
# Complete the properties for the second section
my $str = $$string;
chop $str;
$self->{sections}->[1] = {
position => length($self->{content}),
size => length($$string) - 1,
type => "$self->{separator}$self->{separator}",
};
} else {
# No sections at all
$self->{_sections} = 1;
}
$self->{content} .= $$string;
return 0;
}
# Complete the properties of the second section
$self->{sections}->[1] = {
position => length($self->{content}),
size => length($string) - 1
};
$self->{content} .= $string;
1;
}
# Handle content parsing for matching brace separated
sub _fill_braced {
my $self = shift;
my $t = shift;
# Get the content up to the close character
my $section = $self->{sections}->[0];
my $brace_str = $self->_scan_for_brace_character( $t, $section->{_close} );
return undef unless defined $brace_str;
if ( ref $brace_str ) {
# End of file
if ( length($$brace_str) > 1 ) {
# Complete the properties for the first section
my $str = $$brace_str;
chop $str;
$self->{sections}->[0] = {
position => length($self->{content}),
size => length($$brace_str) - 1,
type => $section->{type},
};
$self->{_sections} = 1;
} else {
# No sections at all
$self->{sections} = [ ];
$self->{_sections} = 0;
}
$self->{content} .= $$brace_str;
return 0;
}
# Complete the properties of the first section
$section->{position} = length $self->{content};
$section->{size} = length($brace_str) - 1;
$self->{content} .= $brace_str;
delete $section->{_close};
# We are done if there is only one section
return 1 if $self->{_sections} == 1;
# There are two sections.
# Is there a gap between the sections.
my $char = substr( $t->{line}, ++$t->{line_cursor}, 1 );
if ( $char =~ /\s/ ) {
# Go past the gap
my $gap_str = $self->_scan_quote_like_operator_gap( $t );
return undef unless defined $gap_str;
if ( ref $gap_str ) {
# End of file
$self->{content} .= $$gap_str;
return 0;
}
$self->{content} .= $gap_str;
$char = substr( $t->{line}, $t->{line_cursor}, 1 );
}
$section = $SECTIONS{$char};
if ( $section ) {
# It's a brace
# Initialize the second section
$self->{content} .= $char;
$section = { %$section };
# Advance into the second section
$t->{line_cursor}++;
# Get the content up to the close character
$brace_str = $self->_scan_for_brace_character( $t, $section->{_close} );
return undef unless defined $brace_str;
if ( ref $brace_str ) {
# End of file
if ( length($$brace_str) > 1 ) {
# Complete the properties for the second section
my $str = $$brace_str;
chop $str;
$self->{sections}->[1] = {
position => length($self->{content}),
size => length($$brace_str) - 1,
type => $section->{type},
};
$self->{_sections} = 2;
} else {
# No sections at all
$self->{_sections} = 1;
}
$self->{content} .= $$brace_str;
return 0;
} else {
# Complete the properties for the second section
$self->{sections}->[1] = {
position => length($self->{content}),
size => length($brace_str) - 1,
type => $section->{type},
};
$self->{content} .= $brace_str;
}
} elsif ( $char =~ m/ \A [^\w\s] \z /smx ) {
# It is some other delimiter (weird, but possible)
# Add the delimiter to the content.
$self->{content} .= $char;
# Advance into the next section
$t->{line_cursor}++;
# Get the content up to the end separator
my $string = $self->_scan_for_unescaped_character( $t, $char );
return undef unless defined $string;
if ( ref $string ) {
# End of file
if ( length($$string) > 1 ) {
# Complete the properties for the second section
my $str = $$string;
chop $str;
$self->{sections}->[1] = {
position => length($self->{content}),
size => length($$string) - 1,
type => "$char$char",
};
} else {
# Only the one section
$self->{_sections} = 1;
}
$self->{content} .= $$string;
return 0;
}
# Complete the properties of the second section
$self->{sections}->[1] = {
position => length($self->{content}),
size => length($string) - 1,
type => "$char$char",
};
$self->{content} .= $string;
} else {
# Error, it has to be a delimiter of some sort.
# Although this will result in a REALLY illegal regexp,
# we allow it anyway.
# Create a null second section
$self->{sections}->[1] = {
position => length($self->{content}),
size => 0,
type => '',
};
# Attach an error to the token and move on
$self->{_error} = "No second section of regexp, or does not start with a balanced character";
# Roll back the cursor one char and return signalling end of regexp
$t->{line_cursor}--;
return 0;
}
1;
}
#####################################################################
# Additional methods to find out about the quote
# In a scalar context, get the number of sections
# In an array context, get the section information
sub _sections {
wantarray ? @{$_[0]->{sections}} : scalar @{$_[0]->{sections}}
}
# Get a section's content
sub _section_content {
my $self = shift;
my $i = shift;
$self->{sections} or return;
my $section = $self->{sections}->[$i] or return;
return substr( $self->content, $section->{position}, $section->{size} );
}
# Get the modifiers if any.
# In list context, return the modifier hash.
# In scalar context, clone the hash and return a reference to it.
# If there are no modifiers, simply return.
sub _modifiers {
my $self = shift;
$self->{modifiers} or return;
wantarray and return %{ $self->{modifiers} };
return +{ %{ $self->{modifiers} } };
}
# Get the delimiters, or at least give it a good try to get them.
sub _delimiters {
my $self = shift;
$self->{sections} or return;
my @delims;
foreach my $sect ( @{ $self->{sections} } ) {
if ( exists $sect->{type} ) {
push @delims, $sect->{type};
} else {
my $content = $self->content;
push @delims,
substr( $content, $sect->{position} - 1, 1 ) .
substr( $content, $sect->{position} + $sect->{size}, 1 );
}
}
return @delims;
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut

View File

@@ -0,0 +1,67 @@
package PPI::Token::_QuoteEngine::Simple;
# Simple quote engine
use strict;
use PPI::Token::_QuoteEngine ();
our $VERSION = '1.270'; # VERSION
our @ISA = 'PPI::Token::_QuoteEngine';
sub new {
my $class = shift;
my $separator = shift or return undef;
# Create a new token containing the separator
### This manual SUPER'ing ONLY works because none of
### Token::Quote, Token::QuoteLike and Token::Regexp
### implement a new function of their own.
my $self = PPI::Token::new( $class, $separator ) or return undef;
$self->{separator} = $separator;
$self;
}
sub _fill {
my $class = shift;
my $t = shift;
my $self = $t->{token} or return undef;
# Scan for the end separator
my $string = $self->_scan_for_unescaped_character( $t, $self->{separator} );
return undef unless defined $string;
if ( ref $string ) {
# End of file
$self->{content} .= $$string;
return 0;
} else {
# End of string
$self->{content} .= $string;
return $self;
}
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut