Initial Commit

2025-12-03 16:38:10 +01:00
parent c5e26bf594
commit b732d8d4b5
17680 changed files with 5977495 additions and 2 deletions
--- a/database/perl/vendor/lib/PPI/Token/Word.pm
+++ b/database/perl/vendor/lib/PPI/Token/Word.pm
@@ -0,0 +1,373 @@
+package PPI::Token::Word;
+
+=pod
+
+=head1 NAME
+
+PPI::Token::Word - The generic "word" Token
+
+=head1 INHERITANCE
+
+  PPI::Token::Word
+  isa PPI::Token
+      isa PPI::Element
+
+=head1 DESCRIPTION
+
+A C<PPI::Token::Word> object is a PPI-specific representation of several
+different types of word-like things, and is one of the most common Token
+classes found in typical documents.
+
+Specifically, it includes not only barewords, but also any other valid
+Perl identifier including non-operator keywords and core functions, and
+any include C<::> separators inside it, as long as it fits the
+format of a class, function, etc.
+
+=head1 METHODS
+
+There are no methods available for C<PPI::Token::Word> beyond those
+provided by its L<PPI::Token> and L<PPI::Element> parent
+classes.
+
+We expect to add additional methods to help further resolve a Word as
+a function, method, etc over time.  If you need such a thing right
+now, look at L<Perl::Critic::Utils>.
+
+=cut
+
+use strict;
+use PPI::Token ();
+use PPI::Singletons qw' %OPERATOR %QUOTELIKE %KEYWORDS ';
+
+our $VERSION = '1.270'; # VERSION
+
+our @ISA = "PPI::Token";
+
+=pod
+
+=head2 literal
+
+Returns the value of the Word as a string.  This assumes (often
+incorrectly) that the Word is a bareword and not a function, method,
+keyword, etc.  This differs from C<content> because C<Foo'Bar> expands
+to C<Foo::Bar>.
+
+=cut
+
+sub literal {
+	my $self = shift;
+	my $word = $self->content;
+
+	# Expand Foo'Bar to Foo::Bar
+	$word =~ s/\'/::/g;
+
+	return $word;
+}
+
+=pod
+
+=head2 method_call
+
+Answers whether this is the name of a method in a method call. Returns true if
+yes, false if no, and nothing if unknown.
+
+=cut
+
+sub method_call {
+	my $self = shift;
+
+	my $previous = $self->sprevious_sibling;
+	if (
+		$previous
+		and
+		$previous->isa('PPI::Token::Operator')
+		and
+		$previous->content eq '->'
+	) {
+		return 1;
+	}
+
+	my $snext = $self->snext_sibling;
+	return 0 unless $snext;
+
+	if (
+		$snext->isa('PPI::Structure::List')
+		or
+		$snext->isa('PPI::Token::Structure')
+		or
+		$snext->isa('PPI::Token::Operator')
+		and (
+			$snext->content eq ','
+			or
+			$snext->content eq '=>'
+		)
+	) {
+		return 0;
+	}
+
+	if (
+		$snext->isa('PPI::Token::Word')
+		and
+		$snext->content =~ m< \w :: \z >xms
+	) {
+		return 1;
+	}
+
+	return;
+}
+
+
+sub __TOKENIZER__on_char {
+	my $class = shift;
+	my $t     = shift;
+
+	# Suck in till the end of the bareword
+	pos $t->{line} = $t->{line_cursor};
+	if ( $t->{line} =~ m/\G(\w+(?:(?:\'|::)\w+)*(?:::)?)/gc ) {
+		my $word = $1;
+		# Special Case: If we accidentally treat eq'foo' like
+		# the word "eq'foo", then just make 'eq' (or whatever
+		# else is in the %KEYWORDS hash.
+		if ( $word =~ /^(\w+)'/ && $KEYWORDS{$1} ) {
+		    $word = $1;
+		}
+		$t->{token}->{content} .= $word;
+		$t->{line_cursor} += length $word;
+
+	}
+
+	# We might be a subroutine attribute.
+	if ( __current_token_is_attribute($t) ) {
+		$t->{class} = $t->{token}->set_class( 'Attribute' );
+		return $t->{class}->__TOKENIZER__commit( $t );
+	}
+
+	my $word = $t->{token}->{content};
+	if ( $KEYWORDS{$word} ) {
+		# Check for a Perl keyword that is forced to be a normal word instead
+		if ( $t->__current_token_is_forced_word ) {
+			$t->{class} = $t->{token}->set_class( 'Word' );
+			return $t->{class}->__TOKENIZER__on_char( $t );
+		}
+
+		# Check for a quote like operator. %QUOTELIKE must be subset of %KEYWORDS
+		if ( $QUOTELIKE{$word} ) {
+			$t->{class} = $t->{token}->set_class( $QUOTELIKE{$word} );
+			return $t->{class}->__TOKENIZER__on_char( $t );
+		}
+
+		# Or one of the word operators. %OPERATOR must be subset of %KEYWORDS
+		if ( $OPERATOR{$word} ) {
+			$t->{class} = $t->{token}->set_class( 'Operator' );
+			return $t->_finalize_token->__TOKENIZER__on_char( $t );
+		}
+	}
+
+	# Unless this is a simple identifier, at this point
+	# it has to be a normal bareword
+	if ( $word =~ /\:/ ) {
+		return $t->_finalize_token->__TOKENIZER__on_char( $t );
+	}
+
+	# If the NEXT character in the line is a colon, this
+	# is a label.
+	my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
+	if ( $char eq ':' ) {
+		$t->{token}->{content} .= ':';
+		$t->{line_cursor}++;
+		$t->{class} = $t->{token}->set_class( 'Label' );
+
+	# If not a label, '_' on its own is the magic filehandle
+	} elsif ( $word eq '_' ) {
+		$t->{class} = $t->{token}->set_class( 'Magic' );
+
+	}
+
+	# Finalise and process the character again
+	$t->_finalize_token->__TOKENIZER__on_char( $t );
+}
+
+
+
+# We are committed to being a bareword.
+# Or so we would like to believe.
+sub __TOKENIZER__commit {
+	my ($class, $t) = @_;
+
+	# Our current position is the first character of the bareword.
+	# Capture the bareword.
+	pos $t->{line} = $t->{line_cursor};
+	unless ( $t->{line} =~ m/\G((?!\d)\w+(?:(?:\'|::)\w+)*(?:::)?)/gc ) {
+		# Programmer error
+		die sprintf "Fatal error... regex failed to match in '%s' when expected", substr $t->{line}, $t->{line_cursor};
+	}
+
+	# Special Case: If we accidentally treat eq'foo' like the word "eq'foo",
+	# then unwind it and just make it 'eq' (or the other stringy comparitors)
+	my $word = $1;
+	if ( $word =~ /^(\w+)'/ && $KEYWORDS{$1} ) {
+	    $word = $1;
+	}
+
+	# Advance the position one after the end of the bareword
+	$t->{line_cursor} += length $word;
+
+	# We might be a subroutine attribute.
+	if ( __current_token_is_attribute($t) ) {
+		$t->_new_token( 'Attribute', $word );
+		return ($t->{line_cursor} >= $t->{line_length}) ? 0
+			: $t->{class}->__TOKENIZER__on_char($t);
+	}
+
+	# Check for the end of the file
+	if ( $word eq '__END__' ) {
+		# Create the token for the __END__ itself
+		$t->_new_token( 'Separator', $1 );
+		$t->_finalize_token;
+
+		# Move into the End zone (heh)
+		$t->{zone} = 'PPI::Token::End';
+
+		# Add the rest of the line as a comment, and a whitespace newline
+		# Anything after the __END__ on the line is "ignored". So we must
+		# also ignore it, by turning it into a comment.
+		my $end_rest = substr( $t->{line}, $t->{line_cursor} );
+		$t->{line_cursor} = length $t->{line};
+		if ( $end_rest =~ /\n$/ ) {
+			chomp $end_rest;
+			$t->_new_token( 'Comment', $end_rest ) if length $end_rest;
+			$t->_new_token( 'Whitespace', "\n" );
+		} else {
+			$t->_new_token( 'Comment', $end_rest ) if length $end_rest;
+		}
+		$t->_finalize_token;
+
+		return 0;
+	}
+
+	# Check for the data section
+	if ( $word eq '__DATA__' ) {
+		# Create the token for the __DATA__ itself
+		$t->_new_token( 'Separator', "$1" );
+		$t->_finalize_token;
+
+		# Move into the Data zone
+		$t->{zone} = 'PPI::Token::Data';
+
+		# Add the rest of the line as the Data token
+		my $data_rest = substr( $t->{line}, $t->{line_cursor} );
+		$t->{line_cursor} = length $t->{line};
+		if ( $data_rest =~ /\n$/ ) {
+			chomp $data_rest;
+			$t->_new_token( 'Comment', $data_rest ) if length $data_rest;
+			$t->_new_token( 'Whitespace', "\n" );
+		} else {
+			$t->_new_token( 'Comment', $data_rest ) if length $data_rest;
+		}
+		$t->_finalize_token;
+
+		return 0;
+	}
+
+	my $token_class;
+	if ( $word =~ /\:/ ) {
+		# Since it's not a simple identifier...
+		$token_class = 'Word';
+
+	} elsif ( $KEYWORDS{$word} and $t->__current_token_is_forced_word ) {
+		$token_class = 'Word';
+
+	} elsif ( $QUOTELIKE{$word} ) {
+		# Special Case: A Quote-like operator
+		$t->_new_token( $QUOTELIKE{$word}, $word );
+		return ($t->{line_cursor} >= $t->{line_length}) ? 0
+			: $t->{class}->__TOKENIZER__on_char( $t );
+
+	} elsif ( $OPERATOR{$word} && ($word ne 'x' || $t->_current_x_is_operator) ) {
+		# Word operator
+		$token_class = 'Operator';
+
+	} else {
+		# Get tokens early to be sure to not disturb state set up by pos and m//gc.
+		my @tokens = $t->_previous_significant_tokens(1);
+
+		# If the next character is a ':' then it's a label...
+		pos $t->{line} = $t->{line_cursor};
+		if ( $t->{line} =~ m/\G(\s*:)(?!:)/gc ) {
+			if ( $tokens[0] and $tokens[0]->{content} eq 'sub' ) {
+				# ... UNLESS it's after 'sub' in which
+				# case it is a sub name and an attribute
+				# operator.
+				# We COULD have checked this at the top
+				# level of checks, but this would impose
+				# an additional performance per-word
+				# penalty, and every other case where the
+				# attribute operator doesn't directly
+				# touch the object name already works.
+				$token_class = 'Word';
+			} else {
+				$word .= $1;
+				$t->{line_cursor} += length($1);
+				$token_class = 'Label';
+			}
+		} elsif ( $word eq '_' ) {
+			$token_class = 'Magic';
+		} else {
+			$token_class = 'Word';
+		}
+	}
+
+	# Create the new token and finalise
+	$t->_new_token( $token_class, $word );
+	if ( $t->{line_cursor} >= $t->{line_length} ) {
+		# End of the line
+		$t->_finalize_token;
+		return 0;
+	}
+	$t->_finalize_token->__TOKENIZER__on_char($t);
+}
+
+
+
+# Is the current Word really a subroutine attribute?
+sub __current_token_is_attribute {
+	my ( $t ) = @_;
+	my @tokens = $t->_previous_significant_tokens(1);
+	return (
+		$tokens[0]
+		and (
+			# hint from tokenizer
+			$tokens[0]->{_attribute}
+			# nothing between attribute and us except whitespace
+			or $tokens[0]->isa('PPI::Token::Attribute')
+		)
+	);
+}
+
+1;
+
+=pod
+
+=head1 TO DO
+
+- Add C<function>, C<method> etc detector methods
+
+=head1 SUPPORT
+
+See the L<support section|PPI/SUPPORT> in the main module.
+
+=head1 AUTHOR
+
+Adam Kennedy E<lt>adamk@cpan.orgE<gt>
+
+=head1 COPYRIGHT
+
+Copyright 2001 - 2011 Adam Kennedy.
+
+This program is free software; you can redistribute
+it and/or modify it under the same terms as Perl itself.
+
+The full text of the license can be found in the
+LICENSE file included with this module.
+
+=cut