Initial Commit
This commit is contained in:
3170
database/perl/vendor/lib/XML/SAX/Base.pm
vendored
Normal file
3170
database/perl/vendor/lib/XML/SAX/Base.pm
vendored
Normal file
File diff suppressed because it is too large
Load Diff
847
database/perl/vendor/lib/XML/SAX/BuildSAXBase.pl
vendored
Normal file
847
database/perl/vendor/lib/XML/SAX/BuildSAXBase.pl
vendored
Normal file
@@ -0,0 +1,847 @@
|
||||
#!/usr/bin/perl
|
||||
#
|
||||
# This file is used to generate lib/XML/SAX/Base.pm. There is a pre-generated
|
||||
# Base.pm file included in the distribution so you don't need to run this
|
||||
# script unless you are attempting to modify the code.
|
||||
#
|
||||
# The code in this file was adapted from the Makefile.PL when XML::SAX::Base
|
||||
# was split back out into its own distribution.
|
||||
#
|
||||
# You can manually run this file:
|
||||
#
|
||||
# perl ./BuildSAXBase.pl
|
||||
#
|
||||
# or better yet it will be invoked by automatically Dist::Zilla when building
|
||||
# a release from the git repository.
|
||||
#
|
||||
# dzil build
|
||||
#
|
||||
|
||||
package SAX::Base::Builder;
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use File::Spec;
|
||||
|
||||
write_xml_sax_base() unless caller();
|
||||
|
||||
sub build_xml_sax_base {
|
||||
my $code = <<'EOHEADER';
|
||||
package XML::SAX::Base;
|
||||
|
||||
# version 0.10 - Kip Hampton <khampton@totalcinema.com>
|
||||
# version 0.13 - Robin Berjon <robin@knowscape.com>
|
||||
# version 0.15 - Kip Hampton <khampton@totalcinema.com>
|
||||
# version 0.17 - Kip Hampton <khampton@totalcinema.com>
|
||||
# version 0.19 - Kip Hampton <khampton@totalcinema.com>
|
||||
# version 0.21 - Kip Hampton <khampton@totalcinema.com>
|
||||
# version 0.22 - Robin Berjon <robin@knowscape.com>
|
||||
# version 0.23 - Matt Sergeant <matt@sergeant.org>
|
||||
# version 0.24 - Robin Berjon <robin@knowscape.com>
|
||||
# version 0.25 - Kip Hampton <khampton@totalcinema.com>
|
||||
# version 1.00 - Kip Hampton <khampton@totalcinema.com>
|
||||
# version 1.01 - Kip Hampton <khampton@totalcinema.com>
|
||||
# version 1.02 - Robin Berjon <robin@knowscape.com>
|
||||
# version 1.03 - Matt Sergeant <matt@sergeant.org>
|
||||
# version 1.04 - Kip Hampton <khampton@totalcinema.com>
|
||||
# version 1.05 - Grant McLean <grantm@cpan.org>
|
||||
# version 1.06 - Grant McLean <grantm@cpan.org>
|
||||
# version 1.07 - Grant McLean <grantm@cpan.org>
|
||||
# version 1.08 - Grant McLean <grantm@cpan.org>
|
||||
|
||||
#-----------------------------------------------------#
|
||||
# STOP!!!!!
|
||||
#
|
||||
# This file is generated by the 'BuildSAXBase.pl' file
|
||||
# that ships with the XML::SAX::Base distribution.
|
||||
# If you need to make changes, patch that file NOT
|
||||
# XML/SAX/Base.pm Better yet, fork the git repository
|
||||
# commit your changes and send a pull request:
|
||||
# https://github.com/grantm/XML-SAX-Base
|
||||
#-----------------------------------------------------#
|
||||
|
||||
use strict;
|
||||
|
||||
use XML::SAX::Exception qw();
|
||||
|
||||
EOHEADER
|
||||
|
||||
my %EVENT_SPEC = (
|
||||
start_document => [qw(ContentHandler DocumentHandler Handler)],
|
||||
end_document => [qw(ContentHandler DocumentHandler Handler)],
|
||||
start_element => [qw(ContentHandler DocumentHandler Handler)],
|
||||
end_element => [qw(ContentHandler DocumentHandler Handler)],
|
||||
characters => [qw(ContentHandler DocumentHandler Handler)],
|
||||
processing_instruction => [qw(ContentHandler DocumentHandler Handler)],
|
||||
ignorable_whitespace => [qw(ContentHandler DocumentHandler Handler)],
|
||||
set_document_locator => [qw(ContentHandler DocumentHandler Handler)],
|
||||
start_prefix_mapping => [qw(ContentHandler Handler)],
|
||||
end_prefix_mapping => [qw(ContentHandler Handler)],
|
||||
skipped_entity => [qw(ContentHandler Handler)],
|
||||
start_cdata => [qw(DocumentHandler LexicalHandler Handler)],
|
||||
end_cdata => [qw(DocumentHandler LexicalHandler Handler)],
|
||||
comment => [qw(DocumentHandler LexicalHandler Handler)],
|
||||
entity_reference => [qw(DocumentHandler Handler)],
|
||||
notation_decl => [qw(DTDHandler Handler)],
|
||||
unparsed_entity_decl => [qw(DTDHandler Handler)],
|
||||
element_decl => [qw(DeclHandler Handler)],
|
||||
attlist_decl => [qw(DTDHandler Handler)],
|
||||
doctype_decl => [qw(DTDHandler Handler)],
|
||||
xml_decl => [qw(DTDHandler Handler)],
|
||||
entity_decl => [qw(DTDHandler Handler)],
|
||||
attribute_decl => [qw(DeclHandler Handler)],
|
||||
internal_entity_decl => [qw(DeclHandler Handler)],
|
||||
external_entity_decl => [qw(DeclHandler Handler)],
|
||||
resolve_entity => [qw(EntityResolver Handler)],
|
||||
start_dtd => [qw(LexicalHandler Handler)],
|
||||
end_dtd => [qw(LexicalHandler Handler)],
|
||||
start_entity => [qw(LexicalHandler Handler)],
|
||||
end_entity => [qw(LexicalHandler Handler)],
|
||||
warning => [qw(ErrorHandler Handler)],
|
||||
error => [qw(ErrorHandler Handler)],
|
||||
fatal_error => [qw(ErrorHandler Handler)],
|
||||
);
|
||||
|
||||
for my $ev (keys %EVENT_SPEC) {
|
||||
$code .= <<" EOTOPCODE";
|
||||
sub $ev {
|
||||
my \$self = shift;
|
||||
if (defined \$self->{Methods}->{'$ev'}) {
|
||||
\$self->{Methods}->{'$ev'}->(\@_);
|
||||
}
|
||||
else {
|
||||
my \$method;
|
||||
my \$callbacks;
|
||||
if (exists \$self->{ParseOptions}) {
|
||||
\$callbacks = \$self->{ParseOptions};
|
||||
}
|
||||
else {
|
||||
\$callbacks = \$self;
|
||||
}
|
||||
if (0) { # dummy to make elsif's below compile
|
||||
}
|
||||
EOTOPCODE
|
||||
|
||||
my ($can_string, $aload_string);
|
||||
for my $h (@{$EVENT_SPEC{$ev}}) {
|
||||
$can_string .= <<" EOCANBLOCK";
|
||||
elsif (defined \$callbacks->{'$h'} and \$method = \$callbacks->{'$h'}->can('$ev') ) {
|
||||
my \$handler = \$callbacks->{'$h'};
|
||||
\$self->{Methods}->{'$ev'} = sub { \$method->(\$handler, \@_) };
|
||||
return \$method->(\$handler, \@_);
|
||||
}
|
||||
EOCANBLOCK
|
||||
$aload_string .= <<" EOALOADBLOCK";
|
||||
elsif (defined \$callbacks->{'$h'}
|
||||
and \$callbacks->{'$h'}->can('AUTOLOAD')
|
||||
and \$callbacks->{'$h'}->can('AUTOLOAD') ne (UNIVERSAL->can('AUTOLOAD') || '')
|
||||
)
|
||||
{
|
||||
my \$res = eval { \$callbacks->{'$h'}->$ev(\@_) };
|
||||
if (\$@) {
|
||||
die \$@;
|
||||
}
|
||||
else {
|
||||
# I think there's a buggette here...
|
||||
# if the first call throws an exception, we don't set it up right.
|
||||
# Not fatal, but we might want to address it.
|
||||
my \$handler = \$callbacks->{'$h'};
|
||||
\$self->{Methods}->{'$ev'} = sub { \$handler->$ev(\@_) };
|
||||
}
|
||||
return \$res;
|
||||
}
|
||||
EOALOADBLOCK
|
||||
}
|
||||
|
||||
$code .= $can_string . $aload_string;
|
||||
|
||||
$code .= <<" EOFALLTHROUGH";
|
||||
else {
|
||||
\$self->{Methods}->{'$ev'} = sub { };
|
||||
}
|
||||
}
|
||||
EOFALLTHROUGH
|
||||
|
||||
$code .= "\n}\n\n";
|
||||
}
|
||||
|
||||
$code .= <<'BODY';
|
||||
#-------------------------------------------------------------------#
|
||||
# Class->new(%options)
|
||||
#-------------------------------------------------------------------#
|
||||
sub new {
|
||||
my $proto = shift;
|
||||
my $class = ref($proto) || $proto;
|
||||
my $options = ($#_ == 0) ? shift : { @_ };
|
||||
|
||||
unless ( defined( $options->{Handler} ) or
|
||||
defined( $options->{ContentHandler} ) or
|
||||
defined( $options->{DTDHandler} ) or
|
||||
defined( $options->{DocumentHandler} ) or
|
||||
defined( $options->{LexicalHandler} ) or
|
||||
defined( $options->{ErrorHandler} ) or
|
||||
defined( $options->{DeclHandler} ) ) {
|
||||
|
||||
$options->{Handler} = XML::SAX::Base::NoHandler->new;
|
||||
}
|
||||
|
||||
my $self = bless $options, $class;
|
||||
# turn NS processing on by default
|
||||
$self->set_feature('http://xml.org/sax/features/namespaces', 1);
|
||||
return $self;
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# $p->parse(%options)
|
||||
#-------------------------------------------------------------------#
|
||||
sub parse {
|
||||
my $self = shift;
|
||||
my $parse_options = $self->get_options(@_);
|
||||
local $self->{ParseOptions} = $parse_options;
|
||||
if ($self->{Parent}) { # calling parse on a filter for some reason
|
||||
return $self->{Parent}->parse($parse_options);
|
||||
}
|
||||
else {
|
||||
my $method;
|
||||
if (defined $parse_options->{Source}{CharacterStream} and $method = $self->can('_parse_characterstream')) {
|
||||
warn("parse charstream???\n");
|
||||
return $method->($self, $parse_options->{Source}{CharacterStream});
|
||||
}
|
||||
elsif (defined $parse_options->{Source}{ByteStream} and $method = $self->can('_parse_bytestream')) {
|
||||
return $method->($self, $parse_options->{Source}{ByteStream});
|
||||
}
|
||||
elsif (defined $parse_options->{Source}{String} and $method = $self->can('_parse_string')) {
|
||||
return $method->($self, $parse_options->{Source}{String});
|
||||
}
|
||||
elsif (defined $parse_options->{Source}{SystemId} and $method = $self->can('_parse_systemid')) {
|
||||
return $method->($self, $parse_options->{Source}{SystemId});
|
||||
}
|
||||
else {
|
||||
die "No _parse_* routine defined on this driver (If it is a filter, remember to set the Parent property. If you call the parse() method, make sure to set a Source. You may want to call parse_uri, parse_string or parse_file instead.) [$self]";
|
||||
}
|
||||
}
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# $p->parse_file(%options)
|
||||
#-------------------------------------------------------------------#
|
||||
sub parse_file {
|
||||
my $self = shift;
|
||||
my $file = shift;
|
||||
return $self->parse_uri($file, @_) if ref(\$file) eq 'SCALAR';
|
||||
my $parse_options = $self->get_options(@_);
|
||||
$parse_options->{Source}{ByteStream} = $file;
|
||||
return $self->parse($parse_options);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# $p->parse_uri(%options)
|
||||
#-------------------------------------------------------------------#
|
||||
sub parse_uri {
|
||||
my $self = shift;
|
||||
my $file = shift;
|
||||
my $parse_options = $self->get_options(@_);
|
||||
$parse_options->{Source}{SystemId} = $file;
|
||||
return $self->parse($parse_options);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# $p->parse_string(%options)
|
||||
#-------------------------------------------------------------------#
|
||||
sub parse_string {
|
||||
my $self = shift;
|
||||
my $string = shift;
|
||||
my $parse_options = $self->get_options(@_);
|
||||
$parse_options->{Source}{String} = $string;
|
||||
return $self->parse($parse_options);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# get_options
|
||||
#-------------------------------------------------------------------#
|
||||
sub get_options {
|
||||
my $self = shift;
|
||||
|
||||
if (@_ == 1) {
|
||||
return { %$self, %{$_[0]} };
|
||||
} else {
|
||||
return { %$self, @_ };
|
||||
}
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# get_features
|
||||
#-------------------------------------------------------------------#
|
||||
sub get_features {
|
||||
return (
|
||||
'http://xml.org/sax/features/external-general-entities' => undef,
|
||||
'http://xml.org/sax/features/external-parameter-entities' => undef,
|
||||
'http://xml.org/sax/features/is-standalone' => undef,
|
||||
'http://xml.org/sax/features/lexical-handler' => undef,
|
||||
'http://xml.org/sax/features/parameter-entities' => undef,
|
||||
'http://xml.org/sax/features/namespaces' => 1,
|
||||
'http://xml.org/sax/features/namespace-prefixes' => 0,
|
||||
'http://xml.org/sax/features/string-interning' => undef,
|
||||
'http://xml.org/sax/features/use-attributes2' => undef,
|
||||
'http://xml.org/sax/features/use-locator2' => undef,
|
||||
'http://xml.org/sax/features/validation' => undef,
|
||||
|
||||
'http://xml.org/sax/properties/dom-node' => undef,
|
||||
'http://xml.org/sax/properties/xml-string' => undef,
|
||||
);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# get_feature
|
||||
#-------------------------------------------------------------------#
|
||||
sub get_feature {
|
||||
my $self = shift;
|
||||
my $feat = shift;
|
||||
|
||||
# check %FEATURES to see if it's there, and return it if so
|
||||
# throw XML::SAX::Exception::NotRecognized if it's not there
|
||||
# throw XML::SAX::Exception::NotSupported if it's there but we
|
||||
# don't support it
|
||||
|
||||
my %features = $self->get_features();
|
||||
if (exists $features{$feat}) {
|
||||
my %supported = map { $_ => 1 } $self->supported_features();
|
||||
if ($supported{$feat}) {
|
||||
return $self->{__PACKAGE__ . "::Features"}{$feat};
|
||||
}
|
||||
throw XML::SAX::Exception::NotSupported(
|
||||
Message => "The feature '$feat' is not supported by " . ref($self),
|
||||
Exception => undef,
|
||||
);
|
||||
}
|
||||
throw XML::SAX::Exception::NotRecognized(
|
||||
Message => "The feature '$feat' is not recognized by " . ref($self),
|
||||
Exception => undef,
|
||||
);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# set_feature
|
||||
#-------------------------------------------------------------------#
|
||||
sub set_feature {
|
||||
my $self = shift;
|
||||
my $feat = shift;
|
||||
my $value = shift;
|
||||
# check %FEATURES to see if it's there, and set it if so
|
||||
# throw XML::SAX::Exception::NotRecognized if it's not there
|
||||
# throw XML::SAX::Exception::NotSupported if it's there but we
|
||||
# don't support it
|
||||
|
||||
my %features = $self->get_features();
|
||||
if (exists $features{$feat}) {
|
||||
my %supported = map { $_ => 1 } $self->supported_features();
|
||||
if ($supported{$feat}) {
|
||||
return $self->{__PACKAGE__ . "::Features"}{$feat} = $value;
|
||||
}
|
||||
throw XML::SAX::Exception::NotSupported(
|
||||
Message => "The feature '$feat' is not supported by " . ref($self),
|
||||
Exception => undef,
|
||||
);
|
||||
}
|
||||
throw XML::SAX::Exception::NotRecognized(
|
||||
Message => "The feature '$feat' is not recognized by " . ref($self),
|
||||
Exception => undef,
|
||||
);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# get_handler and friends
|
||||
#-------------------------------------------------------------------#
|
||||
sub get_handler {
|
||||
my $self = shift;
|
||||
my $handler_type = shift;
|
||||
$handler_type ||= 'Handler';
|
||||
return defined( $self->{$handler_type} ) ? $self->{$handler_type} : undef;
|
||||
}
|
||||
|
||||
sub get_document_handler {
|
||||
my $self = shift;
|
||||
return $self->get_handler('DocumentHandler', @_);
|
||||
}
|
||||
|
||||
sub get_content_handler {
|
||||
my $self = shift;
|
||||
return $self->get_handler('ContentHandler', @_);
|
||||
}
|
||||
|
||||
sub get_dtd_handler {
|
||||
my $self = shift;
|
||||
return $self->get_handler('DTDHandler', @_);
|
||||
}
|
||||
|
||||
sub get_lexical_handler {
|
||||
my $self = shift;
|
||||
return $self->get_handler('LexicalHandler', @_);
|
||||
}
|
||||
|
||||
sub get_decl_handler {
|
||||
my $self = shift;
|
||||
return $self->get_handler('DeclHandler', @_);
|
||||
}
|
||||
|
||||
sub get_error_handler {
|
||||
my $self = shift;
|
||||
return $self->get_handler('ErrorHandler', @_);
|
||||
}
|
||||
|
||||
sub get_entity_resolver {
|
||||
my $self = shift;
|
||||
return $self->get_handler('EntityResolver', @_);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# set_handler and friends
|
||||
#-------------------------------------------------------------------#
|
||||
sub set_handler {
|
||||
my $self = shift;
|
||||
my ($new_handler, $handler_type) = reverse @_;
|
||||
$handler_type ||= 'Handler';
|
||||
$self->{Methods} = {} if $self->{Methods};
|
||||
$self->{$handler_type} = $new_handler;
|
||||
$self->{ParseOptions}->{$handler_type} = $new_handler;
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub set_document_handler {
|
||||
my $self = shift;
|
||||
return $self->set_handler('DocumentHandler', @_);
|
||||
}
|
||||
|
||||
sub set_content_handler {
|
||||
my $self = shift;
|
||||
return $self->set_handler('ContentHandler', @_);
|
||||
}
|
||||
sub set_dtd_handler {
|
||||
my $self = shift;
|
||||
return $self->set_handler('DTDHandler', @_);
|
||||
}
|
||||
sub set_lexical_handler {
|
||||
my $self = shift;
|
||||
return $self->set_handler('LexicalHandler', @_);
|
||||
}
|
||||
sub set_decl_handler {
|
||||
my $self = shift;
|
||||
return $self->set_handler('DeclHandler', @_);
|
||||
}
|
||||
sub set_error_handler {
|
||||
my $self = shift;
|
||||
return $self->set_handler('ErrorHandler', @_);
|
||||
}
|
||||
sub set_entity_resolver {
|
||||
my $self = shift;
|
||||
return $self->set_handler('EntityResolver', @_);
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# supported_features
|
||||
#-------------------------------------------------------------------#
|
||||
sub supported_features {
|
||||
my $self = shift;
|
||||
# Only namespaces are required by all parsers
|
||||
return (
|
||||
'http://xml.org/sax/features/namespaces',
|
||||
);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
sub no_op {
|
||||
# this space intentionally blank
|
||||
}
|
||||
|
||||
|
||||
package XML::SAX::Base::NoHandler;
|
||||
|
||||
# we need a fake handler that doesn't implement anything, this
|
||||
# simplifies the code a lot (though given the recent changes,
|
||||
# it may be better to do without)
|
||||
sub new {
|
||||
#warn "no handler called\n";
|
||||
return bless {};
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
BODY
|
||||
|
||||
$code .= "__END__\n";
|
||||
|
||||
$code .= <<'FOOTER';
|
||||
|
||||
=head1 NAME
|
||||
|
||||
XML::SAX::Base - Base class SAX Drivers and Filters
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
package MyFilter;
|
||||
use XML::SAX::Base;
|
||||
@ISA = ('XML::SAX::Base');
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module has a very simple task - to be a base class for PerlSAX
|
||||
drivers and filters. It's default behaviour is to pass the input directly
|
||||
to the output unchanged. It can be useful to use this module as a base class
|
||||
so you don't have to, for example, implement the characters() callback.
|
||||
|
||||
The main advantages that it provides are easy dispatching of events the right
|
||||
way (ie it takes care for you of checking that the handler has implemented
|
||||
that method, or has defined an AUTOLOAD), and the guarantee that filters
|
||||
will pass along events that they aren't implementing to handlers downstream
|
||||
that might nevertheless be interested in them.
|
||||
|
||||
=head1 WRITING SAX DRIVERS AND FILTERS
|
||||
|
||||
The Perl Sax API Reference is at L<http://perl-xml.sourceforge.net/perl-sax/>.
|
||||
|
||||
Writing SAX Filters is tremendously easy: all you need to do is
|
||||
inherit from this module, and define the events you want to handle. A
|
||||
more detailed explanation can be found at
|
||||
http://www.xml.com/pub/a/2001/10/10/sax-filters.html.
|
||||
|
||||
Writing Drivers is equally simple. The one thing you need to pay
|
||||
attention to is B<NOT> to call events yourself (this applies to Filters
|
||||
as well). For instance:
|
||||
|
||||
package MyFilter;
|
||||
use base qw(XML::SAX::Base);
|
||||
|
||||
sub start_element {
|
||||
my $self = shift;
|
||||
my $data = shift;
|
||||
# do something
|
||||
$self->{Handler}->start_element($data); # BAD
|
||||
}
|
||||
|
||||
The above example works well as precisely that: an example. But it has
|
||||
several faults: 1) it doesn't test to see whether the handler defines
|
||||
start_element. Perhaps it doesn't want to see that event, in which
|
||||
case you shouldn't throw it (otherwise it'll die). 2) it doesn't check
|
||||
ContentHandler and then Handler (ie it doesn't look to see that the
|
||||
user hasn't requested events on a specific handler, and if not on the
|
||||
default one), 3) if it did check all that, not only would the code be
|
||||
cumbersome (see this module's source to get an idea) but it would also
|
||||
probably have to check for a DocumentHandler (in case this were SAX1)
|
||||
and for AUTOLOADs potentially defined in all these packages. As you can
|
||||
tell, that would be fairly painful. Instead of going through that,
|
||||
simply remember to use code similar to the following instead:
|
||||
|
||||
package MyFilter;
|
||||
use base qw(XML::SAX::Base);
|
||||
|
||||
sub start_element {
|
||||
my $self = shift;
|
||||
my $data = shift;
|
||||
# do something to filter
|
||||
$self->SUPER::start_element($data); # GOOD (and easy) !
|
||||
}
|
||||
|
||||
This way, once you've done your job you hand the ball back to
|
||||
XML::SAX::Base and it takes care of all those problems for you!
|
||||
|
||||
Note that the above example doesn't apply to filters only, drivers
|
||||
will benefit from the exact same feature.
|
||||
|
||||
=head1 METHODS
|
||||
|
||||
A number of methods are defined within this class for the purpose of
|
||||
inheritance. Some probably don't need to be overridden (eg parse_file)
|
||||
but some clearly should be (eg parse). Options for these methods are
|
||||
described in the PerlSAX2 specification available from
|
||||
http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/~checkout~/perl-xml/libxml-perl/doc/sax-2.0.html?rev=HEAD&content-type=text/html.
|
||||
|
||||
=over 4
|
||||
|
||||
=item * parse
|
||||
|
||||
The parse method is the main entry point to parsing documents. Internally
|
||||
the parse method will detect what type of "thing" you are parsing, and
|
||||
call the appropriate method in your implementation class. Here is the
|
||||
mapping table of what is in the Source options (see the Perl SAX 2.0
|
||||
specification for the meaning of these values):
|
||||
|
||||
Source Contains parse() calls
|
||||
=============== =============
|
||||
CharacterStream (*) _parse_characterstream($stream, $options)
|
||||
ByteStream _parse_bytestream($stream, $options)
|
||||
String _parse_string($string, $options)
|
||||
SystemId _parse_systemid($string, $options)
|
||||
|
||||
However note that these methods may not be sensible if your driver class
|
||||
is not for parsing XML. An example might be a DBI driver that generates
|
||||
XML/SAX from a database table. If that is the case, you likely want to
|
||||
write your own parse() method.
|
||||
|
||||
Also note that the Source may contain both a PublicId entry, and an
|
||||
Encoding entry. To get at these, examine $options->{Source} as passed
|
||||
to your method.
|
||||
|
||||
(*) A CharacterStream is a filehandle that does not need any encoding
|
||||
translation done on it. This is implemented as a regular filehandle
|
||||
and only works under Perl 5.7.2 or higher using PerlIO. To get a single
|
||||
character, or number of characters from it, use the perl core read()
|
||||
function. To get a single byte from it (or number of bytes), you can
|
||||
use sysread(). The encoding of the stream should be in the Encoding
|
||||
entry for the Source.
|
||||
|
||||
=item * parse_file, parse_uri, parse_string
|
||||
|
||||
These are all convenience variations on parse(), and in fact simply
|
||||
set up the options before calling it. You probably don't need to
|
||||
override these.
|
||||
|
||||
=item * get_options
|
||||
|
||||
This is a convenience method to get options in SAX2 style, or more
|
||||
generically either as hashes or as hashrefs (it returns a hashref).
|
||||
You will probably want to use this method in your own implementations
|
||||
of parse() and of new().
|
||||
|
||||
=item * get_feature, set_feature
|
||||
|
||||
These simply get and set features, and throw the
|
||||
appropriate exceptions defined in the specification if need be.
|
||||
|
||||
If your subclass defines features not defined in this one,
|
||||
then you should override these methods in such a way that they check for
|
||||
your features first, and then call the base class's methods
|
||||
for features not defined by your class. An example would be:
|
||||
|
||||
sub get_feature {
|
||||
my $self = shift;
|
||||
my $feat = shift;
|
||||
if (exists $MY_FEATURES{$feat}) {
|
||||
# handle the feature in various ways
|
||||
}
|
||||
else {
|
||||
return $self->SUPER::get_feature($feat);
|
||||
}
|
||||
}
|
||||
|
||||
Currently this part is unimplemented.
|
||||
|
||||
|
||||
=item * set_handler
|
||||
|
||||
This method takes a handler type (Handler, ContentHandler, etc.) and a
|
||||
handler object as arguments, and changes the current handler for that
|
||||
handler type, while taking care of resetting the internal state that
|
||||
needs to be reset. This allows one to change a handler during parse
|
||||
without running into problems (changing it on the parser object
|
||||
directly will most likely cause trouble).
|
||||
|
||||
=item * set_document_handler, set_content_handler, set_dtd_handler, set_lexical_handler, set_decl_handler, set_error_handler, set_entity_resolver
|
||||
|
||||
These are just simple wrappers around the former method, and take a
|
||||
handler object as their argument. Internally they simply call
|
||||
set_handler with the correct arguments.
|
||||
|
||||
=item * get_handler
|
||||
|
||||
The inverse of set_handler, this method takes a an optional string containing a handler type (DTDHandler,
|
||||
ContentHandler, etc. 'Handler' is used if no type is passed). It returns a reference to the object that implements
|
||||
that class, or undef if that handler type is not set for the current driver/filter.
|
||||
|
||||
=item * get_document_handler, get_content_handler, get_dtd_handler, get_lexical_handler, get_decl_handler,
|
||||
get_error_handler, get_entity_resolver
|
||||
|
||||
These are just simple wrappers around the get_handler() method, and take no arguments. Internally
|
||||
they simply call get_handler with the correct handler type name.
|
||||
|
||||
=back
|
||||
|
||||
It would be rather useless to describe all the methods that this
|
||||
module implements here. They are all the methods supported in SAX1 and
|
||||
SAX2. In case your memory is a little short, here is a list. The
|
||||
apparent duplicates are there so that both versions of SAX can be
|
||||
supported.
|
||||
|
||||
=over 4
|
||||
|
||||
=item * start_document
|
||||
|
||||
=item * end_document
|
||||
|
||||
=item * start_element
|
||||
|
||||
=item * start_document
|
||||
|
||||
=item * end_document
|
||||
|
||||
=item * start_element
|
||||
|
||||
=item * end_element
|
||||
|
||||
=item * characters
|
||||
|
||||
=item * processing_instruction
|
||||
|
||||
=item * ignorable_whitespace
|
||||
|
||||
=item * set_document_locator
|
||||
|
||||
=item * start_prefix_mapping
|
||||
|
||||
=item * end_prefix_mapping
|
||||
|
||||
=item * skipped_entity
|
||||
|
||||
=item * start_cdata
|
||||
|
||||
=item * end_cdata
|
||||
|
||||
=item * comment
|
||||
|
||||
=item * entity_reference
|
||||
|
||||
=item * notation_decl
|
||||
|
||||
=item * unparsed_entity_decl
|
||||
|
||||
=item * element_decl
|
||||
|
||||
=item * attlist_decl
|
||||
|
||||
=item * doctype_decl
|
||||
|
||||
=item * xml_decl
|
||||
|
||||
=item * entity_decl
|
||||
|
||||
=item * attribute_decl
|
||||
|
||||
=item * internal_entity_decl
|
||||
|
||||
=item * external_entity_decl
|
||||
|
||||
=item * resolve_entity
|
||||
|
||||
=item * start_dtd
|
||||
|
||||
=item * end_dtd
|
||||
|
||||
=item * start_entity
|
||||
|
||||
=item * end_entity
|
||||
|
||||
=item * warning
|
||||
|
||||
=item * error
|
||||
|
||||
=item * fatal_error
|
||||
|
||||
=back
|
||||
|
||||
=head1 TODO
|
||||
|
||||
- more tests
|
||||
- conform to the "SAX Filters" and "Java and DOM compatibility"
|
||||
sections of the SAX2 document.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Kip Hampton (khampton@totalcinema.com) did most of the work, after porting
|
||||
it from XML::Filter::Base.
|
||||
|
||||
Robin Berjon (robin@knowscape.com) pitched in with patches to make it
|
||||
usable as a base for drivers as well as filters, along with other patches.
|
||||
|
||||
Matt Sergeant (matt@sergeant.org) wrote the original XML::Filter::Base,
|
||||
and patched a few things here and there, and imported it into
|
||||
the XML::SAX distribution.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<XML::SAX>
|
||||
|
||||
=cut
|
||||
|
||||
FOOTER
|
||||
|
||||
|
||||
return $code;
|
||||
}
|
||||
|
||||
|
||||
sub write_xml_sax_base {
|
||||
confirm_forced_update();
|
||||
|
||||
my $path = File::Spec->catfile("lib", "XML", "SAX", "Base.pm");
|
||||
save_original_xml_sax_base($path);
|
||||
|
||||
my $code = build_xml_sax_base();
|
||||
$code = add_version_stanzas($code);
|
||||
|
||||
open my $fh, ">", $path or die "Cannot write $path: $!";
|
||||
print $fh $code;
|
||||
close $fh or die "Error writing $path: $!";
|
||||
print "Wrote $path\n";
|
||||
}
|
||||
|
||||
|
||||
sub confirm_forced_update {
|
||||
return if grep { $_ eq '--force' } @ARGV;
|
||||
|
||||
print <<'EOF';
|
||||
*** WARNING ***
|
||||
|
||||
The BuildSAXBase.pl script is used to generate the lib/XML/SAX/Base.pm file.
|
||||
However a pre-generated version of Base.pm is included in the distribution
|
||||
so you do not need to run this script unless you intend to modify the code.
|
||||
|
||||
You must use the --force option to deliberately overwrite the distributed
|
||||
version of lib/XML/SAX/Base.pm
|
||||
|
||||
EOF
|
||||
|
||||
exit;
|
||||
}
|
||||
|
||||
|
||||
sub save_original_xml_sax_base {
|
||||
my($path) = @_;
|
||||
|
||||
return unless -e $path;
|
||||
(my $save_path = $path) =~ s{Base}{Base-orig};
|
||||
return if -e $save_path;
|
||||
print "Saving $path to $save_path\n";
|
||||
rename($path, $save_path);
|
||||
}
|
||||
|
||||
|
||||
sub add_version_stanzas {
|
||||
my($code) = @_;
|
||||
|
||||
my $version = get_xml_sax_base_version();
|
||||
$code =~ s<^(package\s+(\w[:\w]+).*?\n)>
|
||||
<${1}BEGIN {\n \$${2}::VERSION = '$version';\n}\n>mg;
|
||||
return $code;
|
||||
}
|
||||
|
||||
|
||||
sub get_xml_sax_base_version {
|
||||
open my $fh, '<', 'dist.ini' or die "open(<dist.ini): $!";
|
||||
while(<$fh>) {
|
||||
m{^\s*version\s*=\s*(\S+)} && return $1;
|
||||
}
|
||||
die "Failed to find version in dist.ini";
|
||||
}
|
||||
|
||||
134
database/perl/vendor/lib/XML/SAX/DocumentLocator.pm
vendored
Normal file
134
database/perl/vendor/lib/XML/SAX/DocumentLocator.pm
vendored
Normal file
@@ -0,0 +1,134 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::DocumentLocator;
|
||||
use strict;
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
my %object;
|
||||
tie %object, $class, @_;
|
||||
|
||||
return bless \%object, $class;
|
||||
}
|
||||
|
||||
sub TIEHASH {
|
||||
my $class = shift;
|
||||
my ($pubmeth, $sysmeth, $linemeth, $colmeth, $encmeth, $xmlvmeth) = @_;
|
||||
return bless {
|
||||
pubmeth => $pubmeth,
|
||||
sysmeth => $sysmeth,
|
||||
linemeth => $linemeth,
|
||||
colmeth => $colmeth,
|
||||
encmeth => $encmeth,
|
||||
xmlvmeth => $xmlvmeth,
|
||||
}, $class;
|
||||
}
|
||||
|
||||
sub FETCH {
|
||||
my ($self, $key) = @_;
|
||||
my $method;
|
||||
if ($key eq 'PublicId') {
|
||||
$method = $self->{pubmeth};
|
||||
}
|
||||
elsif ($key eq 'SystemId') {
|
||||
$method = $self->{sysmeth};
|
||||
}
|
||||
elsif ($key eq 'LineNumber') {
|
||||
$method = $self->{linemeth};
|
||||
}
|
||||
elsif ($key eq 'ColumnNumber') {
|
||||
$method = $self->{colmeth};
|
||||
}
|
||||
elsif ($key eq 'Encoding') {
|
||||
$method = $self->{encmeth};
|
||||
}
|
||||
elsif ($key eq 'XMLVersion') {
|
||||
$method = $self->{xmlvmeth};
|
||||
}
|
||||
if ($method) {
|
||||
my $value = $method->($key);
|
||||
return $value;
|
||||
}
|
||||
return undef;
|
||||
}
|
||||
|
||||
sub EXISTS {
|
||||
my ($self, $key) = @_;
|
||||
if ($key =~ /^(PublicId|SystemId|LineNumber|ColumnNumber|Encoding|XMLVersion)$/) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub STORE {
|
||||
my ($self, $key, $value) = @_;
|
||||
}
|
||||
|
||||
sub DELETE {
|
||||
my ($self, $key) = @_;
|
||||
}
|
||||
|
||||
sub CLEAR {
|
||||
my ($self) = @_;
|
||||
}
|
||||
|
||||
sub FIRSTKEY {
|
||||
my ($self) = @_;
|
||||
# assignment resets.
|
||||
$self->{keys} = {
|
||||
PublicId => 1,
|
||||
SystemId => 1,
|
||||
LineNumber => 1,
|
||||
ColumnNumber => 1,
|
||||
Encoding => 1,
|
||||
XMLVersion => 1,
|
||||
};
|
||||
return each %{$self->{keys}};
|
||||
}
|
||||
|
||||
sub NEXTKEY {
|
||||
my ($self, $lastkey) = @_;
|
||||
return each %{$self->{keys}};
|
||||
}
|
||||
|
||||
1;
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
XML::SAX::DocumentLocator - Helper class for document locators
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
my $locator = XML::SAX::DocumentLocator->new(
|
||||
sub { $object->get_public_id },
|
||||
sub { $object->get_system_id },
|
||||
sub { $reader->current_line },
|
||||
sub { $reader->current_column },
|
||||
sub { $reader->get_encoding },
|
||||
sub { $reader->get_xml_version },
|
||||
);
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module gives you a tied hash reference that calls the
|
||||
specified closures when asked for PublicId, SystemId,
|
||||
LineNumber and ColumnNumber.
|
||||
|
||||
It is useful for writing SAX Parsers so that you don't have
|
||||
to constantly update the line numbers in a hash reference on
|
||||
the object you pass to set_document_locator(). See the source
|
||||
code for XML::SAX::PurePerl for a usage example.
|
||||
|
||||
=head1 API
|
||||
|
||||
There is only 1 method: C<new>. Simply pass it a list of
|
||||
closures that when called will return the PublicId, the
|
||||
SystemId, the LineNumber, the ColumnNumber, the Encoding
|
||||
and the XMLVersion respectively.
|
||||
|
||||
The closures are passed a single parameter, the key being
|
||||
requested. But you're free to ignore that.
|
||||
|
||||
=cut
|
||||
|
||||
126
database/perl/vendor/lib/XML/SAX/Exception.pm
vendored
Normal file
126
database/perl/vendor/lib/XML/SAX/Exception.pm
vendored
Normal file
@@ -0,0 +1,126 @@
|
||||
package XML::SAX::Exception;
|
||||
$XML::SAX::Exception::VERSION = '1.09';
|
||||
use strict;
|
||||
|
||||
use overload '""' => "stringify",
|
||||
'fallback' => 1;
|
||||
|
||||
use vars qw($StackTrace);
|
||||
|
||||
use Carp;
|
||||
|
||||
$StackTrace = $ENV{XML_DEBUG} || 0;
|
||||
|
||||
# Other exception classes:
|
||||
|
||||
@XML::SAX::Exception::NotRecognized::ISA = ('XML::SAX::Exception');
|
||||
@XML::SAX::Exception::NotSupported::ISA = ('XML::SAX::Exception');
|
||||
@XML::SAX::Exception::Parse::ISA = ('XML::SAX::Exception');
|
||||
|
||||
|
||||
sub throw {
|
||||
my $class = shift;
|
||||
if (ref($class)) {
|
||||
die $class;
|
||||
}
|
||||
die $class->new(@_);
|
||||
}
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
my %opts = @_;
|
||||
confess "Invalid options: " . join(', ', keys %opts) unless exists $opts{Message};
|
||||
|
||||
bless { ($StackTrace ? (StackTrace => stacktrace()) : ()), %opts },
|
||||
$class;
|
||||
}
|
||||
|
||||
sub stringify {
|
||||
my $self = shift;
|
||||
local $^W;
|
||||
my $error;
|
||||
if (exists $self->{LineNumber}) {
|
||||
$error = $self->{Message} . " [Ln: " . $self->{LineNumber} .
|
||||
", Col: " . $self->{ColumnNumber} . "]";
|
||||
}
|
||||
else {
|
||||
$error = $self->{Message};
|
||||
}
|
||||
if ($StackTrace) {
|
||||
$error .= stackstring($self->{StackTrace});
|
||||
}
|
||||
$error .= "\n";
|
||||
return $error;
|
||||
}
|
||||
|
||||
sub stacktrace {
|
||||
my $i = 2;
|
||||
my @fulltrace;
|
||||
while (my @trace = caller($i++)) {
|
||||
my %hash;
|
||||
@hash{qw(Package Filename Line)} = @trace[0..2];
|
||||
push @fulltrace, \%hash;
|
||||
}
|
||||
return \@fulltrace;
|
||||
}
|
||||
|
||||
sub stackstring {
|
||||
my $stacktrace = shift;
|
||||
my $string = "\nFrom:\n";
|
||||
foreach my $current (@$stacktrace) {
|
||||
$string .= $current->{Filename} . " Line: " . $current->{Line} . "\n";
|
||||
}
|
||||
return $string;
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
XML::SAX::Exception - Exception classes for XML::SAX
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
throw XML::SAX::Exception::NotSupported(
|
||||
Message => "The foo feature is not supported",
|
||||
);
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module is the base class for all SAX Exceptions, those defined in
|
||||
the spec as well as those that one may create for one's own SAX errors.
|
||||
|
||||
There are three subclasses included, corresponding to those of the SAX
|
||||
spec:
|
||||
|
||||
XML::SAX::Exception::NotSupported
|
||||
XML::SAX::Exception::NotRecognized
|
||||
XML::SAX::Exception::Parse
|
||||
|
||||
Use them wherever you want, and as much as possible when you encounter
|
||||
such errors. SAX is meant to use exceptions as much as possible to
|
||||
flag problems.
|
||||
|
||||
=head1 CREATING NEW EXCEPTION CLASSES
|
||||
|
||||
All you need to do to create a new exception class is:
|
||||
|
||||
@XML::SAX::Exception::MyException::ISA = ('XML::SAX::Exception')
|
||||
|
||||
The given package doesn't need to exist, it'll behave correctly this
|
||||
way. If your exception refines an existing exception class, then you
|
||||
may also inherit from that instead of from the base class.
|
||||
|
||||
=head1 THROWING EXCEPTIONS
|
||||
|
||||
This is as simple as exemplified in the SYNOPSIS. In fact, there's
|
||||
nothing more to know. All you have to do is:
|
||||
|
||||
throw XML::SAX::Exception::MyException( Message => 'Something went wrong' );
|
||||
|
||||
and voila, you've thrown an exception which can be caught in an eval block.
|
||||
|
||||
=cut
|
||||
|
||||
597
database/perl/vendor/lib/XML/SAX/Expat.pm
vendored
Normal file
597
database/perl/vendor/lib/XML/SAX/Expat.pm
vendored
Normal file
@@ -0,0 +1,597 @@
|
||||
|
||||
###
|
||||
# XML::SAX::Expat - SAX2 Driver for Expat (XML::Parser)
|
||||
# Originally by Robin Berjon
|
||||
###
|
||||
|
||||
package XML::SAX::Expat;
|
||||
use strict;
|
||||
use base qw(XML::SAX::Base);
|
||||
use XML::NamespaceSupport qw();
|
||||
use XML::Parser qw();
|
||||
|
||||
use vars qw($VERSION);
|
||||
$VERSION = '0.51';
|
||||
|
||||
|
||||
#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,#
|
||||
#`,`, Variations on parse `,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,#
|
||||
#```````````````````````````````````````````````````````````````````#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# CharacterStream
|
||||
#-------------------------------------------------------------------#
|
||||
sub _parse_characterstream {
|
||||
my $p = shift;
|
||||
my $xml = shift;
|
||||
my $opt = shift;
|
||||
|
||||
my $expat = $p->_create_parser($opt);
|
||||
my $result = $expat->parse($xml);
|
||||
$p->_cleanup;
|
||||
return $result;
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# ByteStream
|
||||
#-------------------------------------------------------------------#
|
||||
sub _parse_bytestream {
|
||||
my $p = shift;
|
||||
my $xml = shift;
|
||||
my $opt = shift;
|
||||
|
||||
my $expat = $p->_create_parser($opt);
|
||||
my $result = $expat->parse($xml);
|
||||
$p->_cleanup;
|
||||
return $result;
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# String
|
||||
#-------------------------------------------------------------------#
|
||||
sub _parse_string {
|
||||
my $p = shift;
|
||||
my $xml = shift;
|
||||
my $opt = shift;
|
||||
|
||||
my $expat = $p->_create_parser($opt);
|
||||
my $result = $expat->parse($xml);
|
||||
$p->_cleanup;
|
||||
return $result;
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# SystemId
|
||||
#-------------------------------------------------------------------#
|
||||
sub _parse_systemid {
|
||||
my $p = shift;
|
||||
my $xml = shift;
|
||||
my $opt = shift;
|
||||
|
||||
my $expat = $p->_create_parser($opt);
|
||||
my $result = $expat->parsefile($xml);
|
||||
$p->_cleanup;
|
||||
return $result;
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# $p->_create_parser(\%options)
|
||||
#-------------------------------------------------------------------#
|
||||
sub _create_parser {
|
||||
my $self = shift;
|
||||
my $opt = shift;
|
||||
|
||||
die "ParserReference: parser instance ($self) already parsing\n"
|
||||
if $self->{_InParse};
|
||||
|
||||
my $featUri = 'http://xml.org/sax/features/';
|
||||
my $ppe = ($self->get_feature($featUri . 'external-general-entities') or
|
||||
$self->get_feature($featUri . 'external-parameter-entities') ) ? 1 : 0;
|
||||
|
||||
my $expat = XML::Parser->new( ParseParamEnt => $ppe );
|
||||
$expat->{__XSE} = $self;
|
||||
$expat->setHandlers(
|
||||
Init => \&_handle_init,
|
||||
Final => \&_handle_final,
|
||||
Start => \&_handle_start,
|
||||
End => \&_handle_end,
|
||||
Char => \&_handle_char,
|
||||
Comment => \&_handle_comment,
|
||||
Proc => \&_handle_proc,
|
||||
CdataStart => \&_handle_start_cdata,
|
||||
CdataEnd => \&_handle_end_cdata,
|
||||
Unparsed => \&_handle_unparsed_entity,
|
||||
Notation => \&_handle_notation_decl,
|
||||
#ExternEnt
|
||||
#ExternEntFin
|
||||
Entity => \&_handle_entity_decl,
|
||||
Element => \&_handle_element_decl,
|
||||
Attlist => \&_handle_attr_decl,
|
||||
Doctype => \&_handle_start_doctype,
|
||||
DoctypeFin => \&_handle_end_doctype,
|
||||
XMLDecl => \&_handle_xml_decl,
|
||||
);
|
||||
|
||||
$self->{_InParse} = 1;
|
||||
$self->{_NodeStack} = [];
|
||||
$self->{_NSStack} = [];
|
||||
$self->{_NSHelper} = XML::NamespaceSupport->new({xmlns => 1});
|
||||
$self->{_started} = 0;
|
||||
|
||||
return $expat;
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# $p->_cleanup
|
||||
#-------------------------------------------------------------------#
|
||||
sub _cleanup {
|
||||
my $self = shift;
|
||||
|
||||
$self->{_InParse} = 0;
|
||||
delete $self->{_NodeStack};
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
|
||||
|
||||
#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,#
|
||||
#`,`, Expat Handlers ,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,#
|
||||
#```````````````````````````````````````````````````````````````````#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_init
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_init {
|
||||
#my $self = shift()->{__XSE};
|
||||
|
||||
#my $document = {};
|
||||
#push @{$self->{_NodeStack}}, $document;
|
||||
#$self->SUPER::start_document($document);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_final
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_final {
|
||||
my $self = shift()->{__XSE};
|
||||
|
||||
#my $document = pop @{$self->{_NodeStack}};
|
||||
return $self->SUPER::end_document({});
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_start
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_start {
|
||||
my $self = shift()->{__XSE};
|
||||
my $e_name = shift;
|
||||
my %attr = @_;
|
||||
|
||||
# start_document data
|
||||
$self->_handle_start_document({}) unless $self->{_started};
|
||||
|
||||
# take care of namespaces
|
||||
my $nsh = $self->{_NSHelper};
|
||||
$nsh->push_context;
|
||||
my @new_ns;
|
||||
for my $k (grep !index($_, 'xmlns'), keys %attr) {
|
||||
$k =~ m/^xmlns(:(.*))?$/;
|
||||
my $prefix = $2 || '';
|
||||
$nsh->declare_prefix($prefix, $attr{$k});
|
||||
my $ns = {
|
||||
Prefix => $prefix,
|
||||
NamespaceURI => $attr{$k},
|
||||
};
|
||||
push @new_ns, $ns;
|
||||
$self->SUPER::start_prefix_mapping($ns);
|
||||
}
|
||||
push @{$self->{_NSStack}}, \@new_ns;
|
||||
|
||||
|
||||
# create the attributes
|
||||
my %saxattr;
|
||||
map {
|
||||
my ($ns,$prefix,$lname) = $nsh->process_attribute_name($_);
|
||||
$saxattr{'{' . ($ns || '') . '}' . $lname} = {
|
||||
Name => $_,
|
||||
LocalName => $lname || '',
|
||||
Prefix => $prefix || '',
|
||||
Value => $attr{$_},
|
||||
NamespaceURI => $ns || '',
|
||||
};
|
||||
} keys %attr;
|
||||
|
||||
|
||||
# now the element
|
||||
my ($ns,$prefix,$lname) = $nsh->process_element_name($e_name);
|
||||
my $element = {
|
||||
Name => $e_name,
|
||||
LocalName => $lname || '',
|
||||
Prefix => $prefix || '',
|
||||
NamespaceURI => $ns || '',
|
||||
Attributes => \%saxattr,
|
||||
};
|
||||
|
||||
push @{$self->{_NodeStack}}, $element;
|
||||
$self->SUPER::start_element($element);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_end
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_end {
|
||||
my $self = shift()->{__XSE};
|
||||
|
||||
my %element = %{pop @{$self->{_NodeStack}}};
|
||||
delete $element{Attributes};
|
||||
$self->SUPER::end_element(\%element);
|
||||
|
||||
my $prev_ns = pop @{$self->{_NSStack}};
|
||||
for my $ns (@$prev_ns) {
|
||||
$self->SUPER::end_prefix_mapping( { %$ns } );
|
||||
}
|
||||
$self->{_NSHelper}->pop_context;
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_char
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_char {
|
||||
$_[0]->{__XSE}->_handle_start_document({}) unless $_[0]->{__XSE}->{_started};
|
||||
$_[0]->{__XSE}->SUPER::characters({ Data => $_[1] });
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_comment
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_comment {
|
||||
$_[0]->{__XSE}->_handle_start_document({}) unless $_[0]->{__XSE}->{_started};
|
||||
$_[0]->{__XSE}->SUPER::comment({ Data => $_[1] });
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_proc
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_proc {
|
||||
$_[0]->{__XSE}->_handle_start_document({}) unless $_[0]->{__XSE}->{_started};
|
||||
$_[0]->{__XSE}->SUPER::processing_instruction({ Target => $_[1], Data => $_[2] });
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_start_cdata
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_start_cdata {
|
||||
$_[0]->{__XSE}->SUPER::start_cdata( {} );
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_end_cdata
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_end_cdata {
|
||||
$_[0]->{__XSE}->SUPER::end_cdata( {} );
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_xml_decl
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_xml_decl {
|
||||
my $self = shift()->{__XSE};
|
||||
my $version = shift;
|
||||
my $encoding = shift;
|
||||
my $standalone = shift;
|
||||
|
||||
if (not defined $standalone) { $standalone = ''; }
|
||||
elsif ($standalone) { $standalone = 'yes'; }
|
||||
else { $standalone = 'no'; }
|
||||
my $xd = {
|
||||
Version => $version,
|
||||
Encoding => $encoding,
|
||||
Standalone => $standalone,
|
||||
};
|
||||
#$self->SUPER::xml_decl($xd);
|
||||
$self->_handle_start_document($xd);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_notation_decl
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_notation_decl {
|
||||
my $self = shift()->{__XSE};
|
||||
my $notation = shift;
|
||||
shift;
|
||||
my $system = shift;
|
||||
my $public = shift;
|
||||
|
||||
my $not = {
|
||||
Name => $notation,
|
||||
PublicId => $public,
|
||||
SystemId => $system,
|
||||
};
|
||||
$self->SUPER::notation_decl($not);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_unparsed_entity
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_unparsed_entity {
|
||||
my $self = shift()->{__XSE};
|
||||
my $name = shift;
|
||||
my $system = shift;
|
||||
my $public = shift;
|
||||
my $notation = shift;
|
||||
|
||||
my $ue = {
|
||||
Name => $name,
|
||||
PublicId => $public,
|
||||
SystemId => $system,
|
||||
Notation => $notation,
|
||||
};
|
||||
$self->SUPER::unparsed_entity_decl($ue);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_element_decl
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_element_decl {
|
||||
$_[0]->{__XSE}->SUPER::element_decl({ Name => $_[1], Model => "$_[2]" });
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_attr_decl
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_attr_decl {
|
||||
my $self = shift()->{__XSE};
|
||||
my $ename = shift;
|
||||
my $aname = shift;
|
||||
my $type = shift;
|
||||
my $default = shift;
|
||||
my $fixed = shift;
|
||||
|
||||
my ($vd, $value);
|
||||
if ($fixed) {
|
||||
$vd = '#FIXED';
|
||||
$default =~ s/^(?:"|')//; #"
|
||||
$default =~ s/(?:"|')$//; #"
|
||||
$value = $default;
|
||||
}
|
||||
else {
|
||||
if ($default =~ m/^#/) {
|
||||
$vd = $default;
|
||||
$value = '';
|
||||
}
|
||||
else {
|
||||
$vd = ''; # maybe there's a default ?
|
||||
$default =~ s/^(?:"|')//; #"
|
||||
$default =~ s/(?:"|')$//; #"
|
||||
$value = $default;
|
||||
}
|
||||
}
|
||||
|
||||
my $at = {
|
||||
eName => $ename,
|
||||
aName => $aname,
|
||||
Type => $type,
|
||||
ValueDefault => $vd,
|
||||
Value => $value,
|
||||
};
|
||||
$self->SUPER::attribute_decl($at);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_entity_decl
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_entity_decl {
|
||||
my $self = shift()->{__XSE};
|
||||
my $name = shift;
|
||||
my $val = shift;
|
||||
my $sys = shift;
|
||||
my $pub = shift;
|
||||
my $ndata = shift;
|
||||
my $isprm = shift;
|
||||
|
||||
# deal with param ents
|
||||
if ($isprm) {
|
||||
$name = '%' . $name;
|
||||
}
|
||||
|
||||
# int vs ext
|
||||
if ($val) {
|
||||
my $ent = {
|
||||
Name => $name,
|
||||
Value => $val,
|
||||
};
|
||||
$self->SUPER::internal_entity_decl($ent);
|
||||
}
|
||||
else {
|
||||
my $ent = {
|
||||
Name => $name,
|
||||
PublicId => $pub || '',
|
||||
SystemId => $sys,
|
||||
};
|
||||
$self->SUPER::external_entity_decl($ent);
|
||||
}
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_start_doctype
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_start_doctype {
|
||||
my $self = shift()->{__XSE};
|
||||
my $name = shift;
|
||||
my $sys = shift;
|
||||
my $pub = shift;
|
||||
|
||||
$self->_handle_start_document({}) unless $self->{_started};
|
||||
|
||||
my $dtd = {
|
||||
Name => $name,
|
||||
SystemId => $sys,
|
||||
PublicId => $pub,
|
||||
};
|
||||
$self->SUPER::start_dtd($dtd);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_end_doctype
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_end_doctype {
|
||||
$_[0]->{__XSE}->SUPER::end_dtd( {} );
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _handle_start_document
|
||||
#-------------------------------------------------------------------#
|
||||
sub _handle_start_document {
|
||||
$_[0]->SUPER::start_document($_[1]);
|
||||
$_[0]->{_started} = 1;
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# supported_features
|
||||
#-------------------------------------------------------------------#
|
||||
sub supported_features {
|
||||
return (
|
||||
$_[0]->SUPER::supported_features,
|
||||
'http://xml.org/sax/features/external-general-entities',
|
||||
'http://xml.org/sax/features/external-parameter-entities',
|
||||
);
|
||||
}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,#
|
||||
#`,`, Private Helpers `,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,#
|
||||
#```````````````````````````````````````````````````````````````````#
|
||||
|
||||
#-------------------------------------------------------------------#
|
||||
# _create_node
|
||||
#-------------------------------------------------------------------#
|
||||
#sub _create_node {
|
||||
# shift;
|
||||
# # this may check for a factory later
|
||||
# return {@_};
|
||||
#}
|
||||
#-------------------------------------------------------------------#
|
||||
|
||||
|
||||
1;
|
||||
#,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,#
|
||||
#`,`, Documentation `,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,`,#
|
||||
#```````````````````````````````````````````````````````````````````#
|
||||
|
||||
=pod
|
||||
|
||||
=head1 NAME
|
||||
|
||||
XML::SAX::Expat - SAX2 Driver for Expat (XML::Parser)
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use XML::SAX::Expat;
|
||||
use XML::SAX::MyFooHandler;
|
||||
my $h = XML::SAX::MyFooHandler->new;
|
||||
my $p = XML::SAX::Expat->new(Handler => $h);
|
||||
$p->parse_file('/path/to/foo.xml');
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This is an implementation of a SAX2 driver sitting on top of Expat
|
||||
(XML::Parser) which Ken MacLeod posted to perl-xml and which I have
|
||||
updated.
|
||||
|
||||
It is still incomplete, though most of the basic SAX2 events should be
|
||||
available. The SAX2 spec is currently available from L<http://perl-xml.sourceforge.net/perl-sax/>.
|
||||
|
||||
A more friendly URL as well as a PODification of the spec are in the
|
||||
works.
|
||||
|
||||
=head1 METHODS
|
||||
|
||||
The methods defined in this class correspond to those listed in the
|
||||
PerlSAX2 specification, available above.
|
||||
|
||||
=head1 FEATURES AND CAVEATS
|
||||
|
||||
=over 2
|
||||
|
||||
=item supported_features
|
||||
|
||||
Returns:
|
||||
|
||||
* http://xml.org/sax/features/external-general-entities
|
||||
* http://xml.org/sax/features/external-parameter-entities
|
||||
* [ Features supported by ancestors ]
|
||||
|
||||
Turning one of the first two on also turns the other on (this maps
|
||||
to the XML::Parser ParseParamEnts option). This may be fixed in the
|
||||
future, so don't rely on this behaviour.
|
||||
|
||||
=back
|
||||
|
||||
=head1 MISSING PARTS
|
||||
|
||||
XML::Parser has no listed callbacks for the following events, which
|
||||
are therefore not presently generated (ways may be found in the
|
||||
future):
|
||||
|
||||
* ignorable_whitespace
|
||||
* skipped_entity
|
||||
* start_entity / end_entity
|
||||
* resolve_entity
|
||||
|
||||
Ways of signalling them are welcome. In addition to those,
|
||||
set_document_locator is not yet called.
|
||||
|
||||
=head1 TODO
|
||||
|
||||
- reuse Ken's tests and add more
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Robin Berjon; stolen from Ken Macleod, ken@bitsko.slc.ut.us, and with
|
||||
suggestions and feedback from perl-xml. Currently maintained by Bjoern
|
||||
Hoehrmann, L<http://bjoern.hoehrmann.de/>.
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2001-2008 Robin Berjon. All rights reserved. This program is
|
||||
free software; you can redistribute it and/or modify it under the same
|
||||
terms as Perl itself.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
XML::Parser::PerlSAX
|
||||
|
||||
=cut
|
||||
407
database/perl/vendor/lib/XML/SAX/Intro.pod
vendored
Normal file
407
database/perl/vendor/lib/XML/SAX/Intro.pod
vendored
Normal file
@@ -0,0 +1,407 @@
|
||||
=head1 NAME
|
||||
|
||||
XML::SAX::Intro - An Introduction to SAX Parsing with Perl
|
||||
|
||||
=head1 Introduction
|
||||
|
||||
XML::SAX is a new way to work with XML Parsers in Perl. In this article
|
||||
we'll discuss why you should be using SAX, why you should be using
|
||||
XML::SAX, and we'll see some of the finer implementation details. The
|
||||
text below assumes some familiarity with callback, or push based
|
||||
parsing, but if you are unfamiliar with these techniques then a good
|
||||
place to start is Kip Hampton's excellent series of articles on XML.com.
|
||||
|
||||
=head1 Replacing XML::Parser
|
||||
|
||||
The de-facto way of parsing XML under perl is to use Larry Wall and
|
||||
Clark Cooper's XML::Parser. This module is a Perl and XS wrapper around
|
||||
the expat XML parser library by James Clark. It has been a hugely
|
||||
successful project, but suffers from a couple of rather major flaws.
|
||||
Firstly it is a proprietary API, designed before the SAX API was
|
||||
conceived, which means that it is not easily replaceable by other
|
||||
streaming parsers. Secondly it's callbacks are subrefs. This doesn't
|
||||
sound like much of an issue, but unfortunately leads to code like:
|
||||
|
||||
sub handle_start {
|
||||
my ($e, $el, %attrs) = @_;
|
||||
if ($el eq 'foo') {
|
||||
$e->{inside_foo}++; # BAD! $e is an XML::Parser::Expat object.
|
||||
}
|
||||
}
|
||||
|
||||
As you can see, we're using the $e object to hold our state
|
||||
information, which is a bad idea because we don't own that object - we
|
||||
didn't create it. It's an internal object of XML::Parser, that happens
|
||||
to be a hashref. We could all too easily overwrite XML::Parser internal
|
||||
state variables by using this, or Clark could change it to an array ref
|
||||
(not that he would, because it would break so much code, but he could).
|
||||
|
||||
The only way currently with XML::Parser to safely maintain state is to
|
||||
use a closure:
|
||||
|
||||
my $state = MyState->new();
|
||||
$parser->setHandlers(Start => sub { handle_start($state, @_) });
|
||||
|
||||
This closure traps the $state variable, which now gets passed as the
|
||||
first parameter to your callback. Unfortunately very few people use
|
||||
this technique, as it is not documented in the XML::Parser POD files.
|
||||
|
||||
Another reason you might not want to use XML::Parser is because you
|
||||
need some feature that it doesn't provide (such as validation), or you
|
||||
might need to use a library that doesn't use expat, due to it not being
|
||||
installed on your system, or due to having a restrictive ISP. Using SAX
|
||||
allows you to work around these restrictions.
|
||||
|
||||
=head1 Introducing SAX
|
||||
|
||||
SAX stands for the Simple API for XML. And simple it really is.
|
||||
Constructing a SAX parser and passing events to handlers is done as
|
||||
simply as:
|
||||
|
||||
use XML::SAX;
|
||||
use MySAXHandler;
|
||||
|
||||
my $parser = XML::SAX::ParserFactory->parser(
|
||||
Handler => MySAXHandler->new
|
||||
);
|
||||
|
||||
$parser->parse_uri("foo.xml");
|
||||
|
||||
The important concept to grasp here is that SAX uses a factory class
|
||||
called XML::SAX::ParserFactory to create a new parser instance. The
|
||||
reason for this is so that you can support other underlying
|
||||
parser implementations for different feature sets. This is one thing
|
||||
that XML::Parser has always sorely lacked.
|
||||
|
||||
In the code above we see the parse_uri method used, but we could
|
||||
have equally well
|
||||
called parse_file, parse_string, or parse(). Please see XML::SAX::Base
|
||||
for what these methods take as parameters, but don't be fooled into
|
||||
believing parse_file takes a filename. No, it takes a file handle, a
|
||||
glob, or a subclass of IO::Handle. Beware.
|
||||
|
||||
SAX works very similarly to XML::Parser's default callback method,
|
||||
except it has one major difference: rather than setting individual
|
||||
callbacks, you create a new class in which to receive the callbacks.
|
||||
Each callback is called as a method call on an instance of that handler
|
||||
class. An example will best demonstrate this:
|
||||
|
||||
package MySAXHandler;
|
||||
use base qw(XML::SAX::Base);
|
||||
|
||||
sub start_document {
|
||||
my ($self, $doc) = @_;
|
||||
# process document start event
|
||||
}
|
||||
|
||||
sub start_element {
|
||||
my ($self, $el) = @_;
|
||||
# process element start event
|
||||
}
|
||||
|
||||
Now, when we instantiate this as above, and parse some XML with this as
|
||||
the handler, the methods start_document and start_element will be
|
||||
called as method calls, so this would be the equivalent of directly
|
||||
calling:
|
||||
|
||||
$object->start_element($el);
|
||||
|
||||
Notice how this is different to XML::Parser's calling style, which
|
||||
calls:
|
||||
|
||||
start_element($e, $name, %attribs);
|
||||
|
||||
It's the difference between function calling and method calling which
|
||||
allows you to subclass SAX handlers which contributes to SAX being a
|
||||
powerful solution.
|
||||
|
||||
As you can see, unlike XML::Parser, we have to define a new package in
|
||||
which to do our processing (there are hacks you can do to make this
|
||||
uneccessary, but I'll leave figuring those out to the experts). The
|
||||
biggest benefit of this is that you maintain your own state variable
|
||||
($self in the above example) thus freeing you of the concerns listed
|
||||
above. It is also an improvement in maintainability - you can place the
|
||||
code in a separate file if you wish to, and your callback methods are
|
||||
always called the same thing, rather than having to choose a suitable
|
||||
name for them as you had to with XML::Parser. This is an obvious win.
|
||||
|
||||
SAX parsers are also very flexible in how you pass a handler to them.
|
||||
You can use a constructor parameter as we saw above, or we can pass the
|
||||
handler directly in the call to one of the parse methods:
|
||||
|
||||
$parser->parse(Handler => $handler,
|
||||
Source => { SystemId => "foo.xml" });
|
||||
# or...
|
||||
$parser->parse_file($fh, Handler => $handler);
|
||||
|
||||
This flexibility allows for one parser to be used in many different
|
||||
scenarios throughout your script (though one shouldn't feel pressure to
|
||||
use this method, as parser construction is generally not a time
|
||||
consuming process).
|
||||
|
||||
=head1 Callback Parameters
|
||||
|
||||
The only other thing you need to know to understand basic SAX is the
|
||||
structure of the parameters passed to each of the callbacks. In
|
||||
XML::Parser, all parameters are passed as multiple options to the
|
||||
callbacks, so for example the Start callback would be called as
|
||||
my_start($e, $name, %attributes), and the PI callback would be called
|
||||
as my_processing_instruction($e, $target, $data). In SAX, every
|
||||
callback is passed a hash reference, containing entries that define our
|
||||
"node". The key callbacks and the structures they receive are:
|
||||
|
||||
=head2 start_element
|
||||
|
||||
The start_element handler is called whenever a parser sees an opening
|
||||
tag. It is passed an element structure consisting of:
|
||||
|
||||
=over 4
|
||||
|
||||
=item LocalName
|
||||
|
||||
The name of the element minus any namespace prefix it may
|
||||
have come with in the document.
|
||||
|
||||
=item NamespaceURI
|
||||
|
||||
The URI of the namespace associated with this element,
|
||||
or the empty string for none.
|
||||
|
||||
=item Attributes
|
||||
|
||||
A set of attributes as described below.
|
||||
|
||||
=item Name
|
||||
|
||||
The name of the element as it was seen in the document (i.e.
|
||||
including any prefix associated with it)
|
||||
|
||||
=item Prefix
|
||||
|
||||
The prefix used to qualify this element's namespace, or the
|
||||
empty string if none.
|
||||
|
||||
=back
|
||||
|
||||
The B<Attributes> are a hash reference, keyed by what we have called
|
||||
"James Clark" notation. This means that the attribute name has been
|
||||
expanded to include any associated namespace URI, and put together as
|
||||
{ns}name, where "ns" is the expanded namespace URI of the attribute if
|
||||
and only if the attribute had a prefix, and "name" is the LocalName of
|
||||
the attribute.
|
||||
|
||||
The value of each entry in the attributes hash is another hash
|
||||
structure consisting of:
|
||||
|
||||
=over 4
|
||||
|
||||
=item LocalName
|
||||
|
||||
The name of the attribute minus any namespace prefix it may have
|
||||
come with in the document.
|
||||
|
||||
=item NamespaceURI
|
||||
|
||||
The URI of the namespace associated with this attribute. If the
|
||||
attribute had no prefix, then this consists of just the empty string.
|
||||
|
||||
=item Name
|
||||
|
||||
The attribute's name as it appeared in the document, including any
|
||||
namespace prefix.
|
||||
|
||||
=item Prefix
|
||||
|
||||
The prefix used to qualify this attribute's namepace, or the
|
||||
empty string if none.
|
||||
|
||||
=item Value
|
||||
|
||||
The value of the attribute.
|
||||
|
||||
=back
|
||||
|
||||
So a full example, as output by Data::Dumper might be:
|
||||
|
||||
....
|
||||
|
||||
=head2 end_element
|
||||
|
||||
The end_element handler is called either when a parser sees a closing
|
||||
tag, or after start_element has been called for an empty element (do
|
||||
note however that a parser may if it is so inclined call characters
|
||||
with an empty string when it sees an empty element. There is no simple
|
||||
way in SAX to determine if the parser in fact saw an empty element, a
|
||||
start and end element with no content..
|
||||
|
||||
The end_element handler receives exactly the same structure as
|
||||
start_element, minus the Attributes entry. One must note though that it
|
||||
should not be a reference to the same data as start_element receives,
|
||||
so you may change the values in start_element but this will not affect
|
||||
the values later seen by end_element.
|
||||
|
||||
=head2 characters
|
||||
|
||||
The characters callback may be called in serveral circumstances. The
|
||||
most obvious one is when seeing ordinary character data in the markup.
|
||||
But it is also called for text in a CDATA section, and is also called
|
||||
in other situations. A SAX parser has to make no guarantees whatsoever
|
||||
about how many times it may call characters for a stretch of text in an
|
||||
XML document - it may call once, or it may call once for every
|
||||
character in the text. In order to work around this it is often
|
||||
important for the SAX developer to use a bundling technique, where text
|
||||
is gathered up and processed in one of the other callbacks. This is not
|
||||
always necessary, but it is a worthwhile technique to learn, which we
|
||||
will cover in XML::SAX::Advanced (when I get around to writing it).
|
||||
|
||||
The characters handler is called with a very simple structure - a hash
|
||||
reference consisting of just one entry:
|
||||
|
||||
=over 4
|
||||
|
||||
=item Data
|
||||
|
||||
The text data that was received.
|
||||
|
||||
=back
|
||||
|
||||
=head2 comment
|
||||
|
||||
The comment callback is called for comment text. Unlike with
|
||||
C<characters()>, the comment callback *must* be invoked just once for an
|
||||
entire comment string. It receives a single simple structure - a hash
|
||||
reference containing just one entry:
|
||||
|
||||
=over 4
|
||||
|
||||
=item Data
|
||||
|
||||
The text of the comment.
|
||||
|
||||
=back
|
||||
|
||||
=head2 processing_instruction
|
||||
|
||||
The processing instruction handler is called for all processing
|
||||
instructions in the document. Note that these processing instructions
|
||||
may appear before the document root element, or after it, or anywhere
|
||||
where text and elements would normally appear within the document,
|
||||
according to the XML specification.
|
||||
|
||||
The handler is passed a structure containing just two entries:
|
||||
|
||||
=over 4
|
||||
|
||||
=item Target
|
||||
|
||||
The target of the processing instrcution
|
||||
|
||||
=item Data
|
||||
|
||||
The text data in the processing instruction. Can be an empty
|
||||
string for a processing instruction that has no data element.
|
||||
For example E<lt>?wiggle?E<gt> is a perfectly valid processing instruction.
|
||||
|
||||
=back
|
||||
|
||||
=head1 Tip of the iceberg
|
||||
|
||||
What we have discussed above is really the tip of the SAX iceberg. And
|
||||
so far it looks like there's not much of interest to SAX beyond what we
|
||||
have seen with XML::Parser. But it does go much further than that, I
|
||||
promise.
|
||||
|
||||
People who hate Object Oriented code for the sake of it may be thinking
|
||||
here that creating a new package just to parse something is a waste
|
||||
when they've been parsing things just fine up to now using procedural
|
||||
code. But there's reason to all this madness. And that reason is SAX
|
||||
Filters.
|
||||
|
||||
As you saw right at the very start, to let the parser know about our
|
||||
class, we pass it an instance of our class as the Handler to the
|
||||
parser. But now imagine what would happen if our class could also take
|
||||
a Handler option, and simply do some processing and pass on our data
|
||||
further down the line? That in a nutshell is how SAX filters work. It's
|
||||
Unix pipes for the 21st century!
|
||||
|
||||
There are two downsides to this. Number 1 - writing SAX filters can be
|
||||
tricky. If you look into the future and read the advanced tutorial I'm
|
||||
writing, you'll see that Handler can come in several shapes and sizes.
|
||||
So making sure your filter does the right thing can be tricky.
|
||||
Secondly, constructing complex filter chains can be difficult, and
|
||||
simple thinking tells us that we only get one pass at our document,
|
||||
when often we'll need more than that.
|
||||
|
||||
Luckily though, those downsides have been fixed by the release of two
|
||||
very cool modules. What's even better is that I didn't write either of
|
||||
them!
|
||||
|
||||
The first module is XML::SAX::Base. This is a VITAL SAX module that
|
||||
acts as a base class for all SAX parsers and filters. It provides an
|
||||
abstraction away from calling the handler methods, that makes sure your
|
||||
filter or parser does the right thing, and it does it FAST. So, if you
|
||||
ever need to write a SAX filter, which if you're processing XML -> XML,
|
||||
or XML -> HTML, then you probably do, then you need to be writing it as
|
||||
a subclass of XML::SAX::Base. Really - this is advice not to ignore
|
||||
lightly. I will not go into the details of writing a SAX filter here.
|
||||
Kip Hampton, the author of XML::SAX::Base has covered this nicely in
|
||||
his article on XML.com here <URI>.
|
||||
|
||||
To construct SAX pipelines, Barrie Slaymaker, a long time Perl hacker
|
||||
whose modules you will probably have heard of or used, wrote a very
|
||||
clever module called XML::SAX::Machines. This combines some really
|
||||
clever SAX filter-type modules, with a construction toolkit for filters
|
||||
that makes building pipelines easy. But before we see how it makes
|
||||
things easy, first lets see how tricky it looks to build complex SAX
|
||||
filter pipelines.
|
||||
|
||||
use XML::SAX::ParserFactory;
|
||||
use XML::Filter::Filter1;
|
||||
use XML::Filter::Filter2;
|
||||
use XML::SAX::Writer;
|
||||
|
||||
my $output_string;
|
||||
my $writer = XML::SAX::Writer->new(Output => \$output_string);
|
||||
my $filter2 = XML::SAX::Filter2->new(Handler => $writer);
|
||||
my $filter1 = XML::SAX::Filter1->new(Handler => $filter2);
|
||||
my $parser = XML::SAX::ParserFactory->parser(Handler => $filter1);
|
||||
|
||||
$parser->parse_uri("foo.xml");
|
||||
|
||||
This is a lot easier with XML::SAX::Machines:
|
||||
|
||||
use XML::SAX::Machines qw(Pipeline);
|
||||
|
||||
my $output_string;
|
||||
my $parser = Pipeline(
|
||||
XML::SAX::Filter1 => XML::SAX::Filter2 => \$output_string
|
||||
);
|
||||
|
||||
$parser->parse_uri("foo.xml");
|
||||
|
||||
One of the main benefits of XML::SAX::Machines is that the pipelines
|
||||
are constructed in natural order, rather than the reverse order we saw
|
||||
with manual pipeline construction. XML::SAX::Machines takes care of all
|
||||
the internals of pipe construction, providing you at the end with just
|
||||
a parser you can use (and you can re-use the same parser as many times
|
||||
as you need to).
|
||||
|
||||
Just a final tip. If you ever get stuck and are confused about what is
|
||||
being passed from one SAX filter or parser to the next, then
|
||||
Devel::TraceSAX will come to your rescue. This perl debugger plugin
|
||||
will allow you to dump the SAX stream of events as it goes by. Usage is
|
||||
really very simple just call your perl script that uses SAX as follows:
|
||||
|
||||
$ perl -d:TraceSAX <scriptname>
|
||||
|
||||
And preferably pipe the output to a pager of some sort, such as more or
|
||||
less. The output is extremely verbose, but should help clear some
|
||||
issues up.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Matt Sergeant, matt@sergeant.org
|
||||
|
||||
$Id$
|
||||
|
||||
=cut
|
||||
15
database/perl/vendor/lib/XML/SAX/ParserDetails.ini
vendored
Normal file
15
database/perl/vendor/lib/XML/SAX/ParserDetails.ini
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
[XML::SAX::PurePerl]
|
||||
http://xml.org/sax/features/namespaces = 1
|
||||
|
||||
[XML::SAX::Expat]
|
||||
http://xml.org/sax/features/external-general-entities = 1
|
||||
http://xml.org/sax/features/namespaces = 1
|
||||
http://xml.org/sax/features/external-parameter-entities = 1
|
||||
|
||||
[XML::LibXML::SAX::Parser]
|
||||
http://xml.org/sax/features/namespaces = 1
|
||||
|
||||
[XML::LibXML::SAX]
|
||||
http://xml.org/sax/features/namespaces = 1
|
||||
|
||||
|
||||
230
database/perl/vendor/lib/XML/SAX/ParserFactory.pm
vendored
Normal file
230
database/perl/vendor/lib/XML/SAX/ParserFactory.pm
vendored
Normal file
@@ -0,0 +1,230 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::ParserFactory;
|
||||
|
||||
use strict;
|
||||
use vars qw($VERSION);
|
||||
|
||||
$VERSION = '1.02';
|
||||
|
||||
use Symbol qw(gensym);
|
||||
use XML::SAX;
|
||||
use XML::SAX::Exception;
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
my %params = @_; # TODO : Fix this in spec.
|
||||
my $self = bless \%params, $class;
|
||||
$self->{KnownParsers} = XML::SAX->parsers();
|
||||
return $self;
|
||||
}
|
||||
|
||||
sub parser {
|
||||
my $self = shift;
|
||||
my @parser_params = @_;
|
||||
if (!ref($self)) {
|
||||
$self = $self->new();
|
||||
}
|
||||
|
||||
my $parser_class = $self->_parser_class();
|
||||
|
||||
my $version = '';
|
||||
if ($parser_class =~ s/\s*\(([\d\.]+)\)\s*$//) {
|
||||
$version = " $1";
|
||||
}
|
||||
|
||||
if (!$parser_class->can('new')) {
|
||||
eval "require $parser_class $version;";
|
||||
die $@ if $@;
|
||||
}
|
||||
|
||||
return $parser_class->new(@parser_params);
|
||||
}
|
||||
|
||||
sub require_feature {
|
||||
my $self = shift;
|
||||
my ($feature) = @_;
|
||||
$self->{RequiredFeatures}{$feature}++;
|
||||
return $self;
|
||||
}
|
||||
|
||||
sub _parser_class {
|
||||
my $self = shift;
|
||||
|
||||
# First try ParserPackage
|
||||
if ($XML::SAX::ParserPackage) {
|
||||
return $XML::SAX::ParserPackage;
|
||||
}
|
||||
|
||||
# Now check if required/preferred is there
|
||||
if ($self->{RequiredFeatures}) {
|
||||
my %required = %{$self->{RequiredFeatures}};
|
||||
# note - we never go onto the next try (ParserDetails.ini),
|
||||
# because if we can't provide the requested feature
|
||||
# we need to throw an exception.
|
||||
PARSER:
|
||||
foreach my $parser (reverse @{$self->{KnownParsers}}) {
|
||||
foreach my $feature (keys %required) {
|
||||
if (!exists $parser->{Features}{$feature}) {
|
||||
next PARSER;
|
||||
}
|
||||
}
|
||||
# got here - all features must exist!
|
||||
return $parser->{Name};
|
||||
}
|
||||
# TODO : should this be NotSupported() ?
|
||||
throw XML::SAX::Exception (
|
||||
Message => "Unable to provide required features",
|
||||
);
|
||||
}
|
||||
|
||||
# Next try SAX.ini
|
||||
for my $dir (@INC) {
|
||||
my $fh = gensym();
|
||||
if (open($fh, "$dir/SAX.ini")) {
|
||||
my $param_list = XML::SAX->_parse_ini_file($fh);
|
||||
my $params = $param_list->[0]->{Features};
|
||||
if ($params->{ParserPackage}) {
|
||||
return $params->{ParserPackage};
|
||||
}
|
||||
else {
|
||||
# we have required features (or nothing?)
|
||||
PARSER:
|
||||
foreach my $parser (reverse @{$self->{KnownParsers}}) {
|
||||
foreach my $feature (keys %$params) {
|
||||
if (!exists $parser->{Features}{$feature}) {
|
||||
next PARSER;
|
||||
}
|
||||
}
|
||||
return $parser->{Name};
|
||||
}
|
||||
XML::SAX->do_warn("Unable to provide SAX.ini required features. Using fallback\n");
|
||||
}
|
||||
last; # stop after first INI found
|
||||
}
|
||||
}
|
||||
|
||||
if (@{$self->{KnownParsers}}) {
|
||||
return $self->{KnownParsers}[-1]{Name};
|
||||
}
|
||||
else {
|
||||
return "XML::SAX::PurePerl"; # backup plan!
|
||||
}
|
||||
}
|
||||
|
||||
1;
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
XML::SAX::ParserFactory - Obtain a SAX parser
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use XML::SAX::ParserFactory;
|
||||
use XML::SAX::XYZHandler;
|
||||
my $handler = XML::SAX::XYZHandler->new();
|
||||
my $p = XML::SAX::ParserFactory->parser(Handler => $handler);
|
||||
$p->parse_uri("foo.xml");
|
||||
# or $p->parse_string("<foo/>") or $p->parse_file($fh);
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
XML::SAX::ParserFactory is a factory class for providing an application
|
||||
with a Perl SAX2 XML parser. It is akin to DBI - a front end for other
|
||||
parser classes. Each new SAX2 parser installed will register itself
|
||||
with XML::SAX, and then it will become available to all applications
|
||||
that use XML::SAX::ParserFactory to obtain a SAX parser.
|
||||
|
||||
Unlike DBI however, XML/SAX parsers almost all work alike (especially
|
||||
if they subclass XML::SAX::Base, as they should), so rather than
|
||||
specifying the parser you want in the call to C<parser()>, XML::SAX
|
||||
has several ways to automatically choose which parser to use:
|
||||
|
||||
=over 4
|
||||
|
||||
=item * $XML::SAX::ParserPackage
|
||||
|
||||
If this package variable is set, then this package is C<require()>d
|
||||
and an instance of this package is returned by calling the C<new()>
|
||||
class method in that package. If it cannot be loaded or there is
|
||||
an error, an exception will be thrown. The variable can also contain
|
||||
a version number:
|
||||
|
||||
$XML::SAX::ParserPackage = "XML::SAX::Expat (0.72)";
|
||||
|
||||
And the number will be treated as a minimum version number.
|
||||
|
||||
=item * Required features
|
||||
|
||||
It is possible to require features from the parsers. For example, you
|
||||
may wish for a parser that supports validation via a DTD. To do that,
|
||||
use the following code:
|
||||
|
||||
use XML::SAX::ParserFactory;
|
||||
my $factory = XML::SAX::ParserFactory->new();
|
||||
$factory->require_feature('http://xml.org/sax/features/validation');
|
||||
my $parser = $factory->parser(...);
|
||||
|
||||
Alternatively, specify the required features in the call to the
|
||||
ParserFactory constructor:
|
||||
|
||||
my $factory = XML::SAX::ParserFactory->new(
|
||||
RequiredFeatures => {
|
||||
'http://xml.org/sax/features/validation' => 1,
|
||||
}
|
||||
);
|
||||
|
||||
If the features you have asked for are unavailable (for example the
|
||||
user might not have a validating parser installed), then an
|
||||
exception will be thrown.
|
||||
|
||||
The list of known parsers is searched in reverse order, so it will
|
||||
always return the last installed parser that supports all of your
|
||||
requested features (Note: this is subject to change if someone
|
||||
comes up with a better way of making this work).
|
||||
|
||||
=item * SAX.ini
|
||||
|
||||
ParserFactory will search @INC for a file called SAX.ini, which
|
||||
is in a simple format:
|
||||
|
||||
# a comment looks like this,
|
||||
; or like this, and are stripped anywhere in the file
|
||||
key = value # SAX.in contains key/value pairs.
|
||||
|
||||
All whitespace is non-significant.
|
||||
|
||||
This file can contain either a line:
|
||||
|
||||
ParserPackage = MyParserModule (1.02)
|
||||
|
||||
Where MyParserModule is the module to load and use for the parser,
|
||||
and the number in brackets is a minimum version to load.
|
||||
|
||||
Or you can list required features:
|
||||
|
||||
http://xml.org/sax/features/validation = 1
|
||||
|
||||
And each feature with a true value will be required.
|
||||
|
||||
=item * Fallback
|
||||
|
||||
If none of the above works, the last parser installed on the user's
|
||||
system will be used. The XML::SAX package ships with a pure perl
|
||||
XML parser, XML::SAX::PurePerl, so that there will always be a
|
||||
fallback parser.
|
||||
|
||||
=back
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Matt Sergeant, matt@sergeant.org
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
This is free software, you may use it and distribute it under the same
|
||||
terms as Perl itself.
|
||||
|
||||
=cut
|
||||
|
||||
751
database/perl/vendor/lib/XML/SAX/PurePerl.pm
vendored
Normal file
751
database/perl/vendor/lib/XML/SAX/PurePerl.pm
vendored
Normal file
@@ -0,0 +1,751 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl;
|
||||
|
||||
use strict;
|
||||
use vars qw/$VERSION/;
|
||||
|
||||
$VERSION = '1.02';
|
||||
|
||||
use XML::SAX::PurePerl::Productions qw($NameChar $SingleChar);
|
||||
use XML::SAX::PurePerl::Reader;
|
||||
use XML::SAX::PurePerl::EncodingDetect ();
|
||||
use XML::SAX::Exception;
|
||||
use XML::SAX::PurePerl::DocType ();
|
||||
use XML::SAX::PurePerl::DTDDecls ();
|
||||
use XML::SAX::PurePerl::XMLDecl ();
|
||||
use XML::SAX::DocumentLocator ();
|
||||
use XML::SAX::Base ();
|
||||
use XML::SAX qw(Namespaces);
|
||||
use XML::NamespaceSupport ();
|
||||
use IO::File;
|
||||
|
||||
if ($] < 5.006) {
|
||||
require XML::SAX::PurePerl::NoUnicodeExt;
|
||||
}
|
||||
else {
|
||||
require XML::SAX::PurePerl::UnicodeExt;
|
||||
}
|
||||
|
||||
use vars qw(@ISA);
|
||||
@ISA = ('XML::SAX::Base');
|
||||
|
||||
my %int_ents = (
|
||||
amp => '&',
|
||||
lt => '<',
|
||||
gt => '>',
|
||||
quot => '"',
|
||||
apos => "'",
|
||||
);
|
||||
|
||||
my $xmlns_ns = "http://www.w3.org/2000/xmlns/";
|
||||
my $xml_ns = "http://www.w3.org/XML/1998/namespace";
|
||||
|
||||
use Carp;
|
||||
sub _parse_characterstream {
|
||||
my $self = shift;
|
||||
my ($fh) = @_;
|
||||
confess("CharacterStream is not yet correctly implemented");
|
||||
my $reader = XML::SAX::PurePerl::Reader::Stream->new($fh);
|
||||
return $self->_parse($reader);
|
||||
}
|
||||
|
||||
sub _parse_bytestream {
|
||||
my $self = shift;
|
||||
my ($fh) = @_;
|
||||
my $reader = XML::SAX::PurePerl::Reader::Stream->new($fh);
|
||||
return $self->_parse($reader);
|
||||
}
|
||||
|
||||
sub _parse_string {
|
||||
my $self = shift;
|
||||
my ($str) = @_;
|
||||
my $reader = XML::SAX::PurePerl::Reader::String->new($str);
|
||||
return $self->_parse($reader);
|
||||
}
|
||||
|
||||
sub _parse_systemid {
|
||||
my $self = shift;
|
||||
my ($uri) = @_;
|
||||
my $reader = XML::SAX::PurePerl::Reader::URI->new($uri);
|
||||
return $self->_parse($reader);
|
||||
}
|
||||
|
||||
sub _parse {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
$reader->public_id($self->{ParseOptions}{Source}{PublicId});
|
||||
$reader->system_id($self->{ParseOptions}{Source}{SystemId});
|
||||
|
||||
$self->{NSHelper} = XML::NamespaceSupport->new({xmlns => 1});
|
||||
|
||||
$self->set_document_locator(
|
||||
XML::SAX::DocumentLocator->new(
|
||||
sub { $reader->public_id },
|
||||
sub { $reader->system_id },
|
||||
sub { $reader->line },
|
||||
sub { $reader->column },
|
||||
sub { $reader->get_encoding },
|
||||
sub { $reader->get_xml_version },
|
||||
),
|
||||
);
|
||||
|
||||
$self->start_document({});
|
||||
|
||||
if (defined $self->{ParseOptions}{Source}{Encoding}) {
|
||||
$reader->set_encoding($self->{ParseOptions}{Source}{Encoding});
|
||||
}
|
||||
else {
|
||||
$self->encoding_detect($reader);
|
||||
}
|
||||
|
||||
# parse a document
|
||||
$self->document($reader);
|
||||
|
||||
return $self->end_document({});
|
||||
}
|
||||
|
||||
sub parser_error {
|
||||
my $self = shift;
|
||||
my ($error, $reader) = @_;
|
||||
|
||||
# warn("parser error: $error from ", $reader->line, " : ", $reader->column, "\n");
|
||||
my $exception = XML::SAX::Exception::Parse->new(
|
||||
Message => $error,
|
||||
ColumnNumber => $reader->column,
|
||||
LineNumber => $reader->line,
|
||||
PublicId => $reader->public_id,
|
||||
SystemId => $reader->system_id,
|
||||
);
|
||||
|
||||
$self->fatal_error($exception);
|
||||
$exception->throw;
|
||||
}
|
||||
|
||||
sub document {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
# document ::= prolog element Misc*
|
||||
|
||||
$self->prolog($reader);
|
||||
$self->element($reader) ||
|
||||
$self->parser_error("Document requires an element", $reader);
|
||||
|
||||
while(length($reader->data)) {
|
||||
$self->Misc($reader) ||
|
||||
$self->parser_error("Only Comments, PIs and whitespace allowed at end of document", $reader);
|
||||
}
|
||||
}
|
||||
|
||||
sub prolog {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
$self->XMLDecl($reader);
|
||||
|
||||
# consume all misc bits
|
||||
1 while($self->Misc($reader));
|
||||
|
||||
if ($self->doctypedecl($reader)) {
|
||||
while (length($reader->data)) {
|
||||
$self->Misc($reader) || last;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub element {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
return 0 unless $reader->match('<');
|
||||
|
||||
my $name = $self->Name($reader) || $self->parser_error("Invalid element name", $reader);
|
||||
|
||||
my %attribs;
|
||||
|
||||
while( my ($k, $v) = $self->Attribute($reader) ) {
|
||||
$attribs{$k} = $v;
|
||||
}
|
||||
|
||||
my $have_namespaces = $self->get_feature(Namespaces);
|
||||
|
||||
# Namespace processing
|
||||
$self->{NSHelper}->push_context;
|
||||
my @new_ns;
|
||||
# my %attrs = @attribs;
|
||||
# while (my ($k,$v) = each %attrs) {
|
||||
if ($have_namespaces) {
|
||||
while ( my ($k, $v) = each %attribs ) {
|
||||
if ($k =~ m/^xmlns(:(.*))?$/) {
|
||||
my $prefix = $2 || '';
|
||||
$self->{NSHelper}->declare_prefix($prefix, $v);
|
||||
my $ns =
|
||||
{
|
||||
Prefix => $prefix,
|
||||
NamespaceURI => $v,
|
||||
};
|
||||
push @new_ns, $ns;
|
||||
$self->SUPER::start_prefix_mapping($ns);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Create element object and fire event
|
||||
my %attrib_hash;
|
||||
while (my ($name, $value) = each %attribs ) {
|
||||
# TODO normalise value here
|
||||
my ($ns, $prefix, $lname);
|
||||
if ($have_namespaces) {
|
||||
($ns, $prefix, $lname) = $self->{NSHelper}->process_attribute_name($name);
|
||||
}
|
||||
$ns ||= ''; $prefix ||= ''; $lname ||= '';
|
||||
$attrib_hash{"{$ns}$lname"} = {
|
||||
Name => $name,
|
||||
LocalName => $lname,
|
||||
Prefix => $prefix,
|
||||
NamespaceURI => $ns,
|
||||
Value => $value,
|
||||
};
|
||||
}
|
||||
|
||||
%attribs = (); # lose the memory since we recurse deep
|
||||
|
||||
my ($ns, $prefix, $lname);
|
||||
if ($self->get_feature(Namespaces)) {
|
||||
($ns, $prefix, $lname) = $self->{NSHelper}->process_element_name($name);
|
||||
}
|
||||
else {
|
||||
$lname = $name;
|
||||
}
|
||||
$ns ||= ''; $prefix ||= ''; $lname ||= '';
|
||||
|
||||
# Process remainder of start_element
|
||||
$self->skip_whitespace($reader);
|
||||
my $have_content;
|
||||
my $data = $reader->data(2);
|
||||
if ($data =~ /^\/>/) {
|
||||
$reader->move_along(2);
|
||||
}
|
||||
else {
|
||||
$data =~ /^>/ or $self->parser_error("No close element tag", $reader);
|
||||
$reader->move_along(1);
|
||||
$have_content++;
|
||||
}
|
||||
|
||||
my $el =
|
||||
{
|
||||
Name => $name,
|
||||
LocalName => $lname,
|
||||
Prefix => $prefix,
|
||||
NamespaceURI => $ns,
|
||||
Attributes => \%attrib_hash,
|
||||
};
|
||||
$self->start_element($el);
|
||||
|
||||
# warn("($name\n");
|
||||
|
||||
if ($have_content) {
|
||||
$self->content($reader);
|
||||
|
||||
my $data = $reader->data(2);
|
||||
$data =~ /^<\// or $self->parser_error("No close tag marker", $reader);
|
||||
$reader->move_along(2);
|
||||
my $end_name = $self->Name($reader);
|
||||
$end_name eq $name || $self->parser_error("End tag mismatch ($end_name != $name)", $reader);
|
||||
$self->skip_whitespace($reader);
|
||||
$reader->match('>') or $self->parser_error("No close '>' on end tag", $reader);
|
||||
}
|
||||
|
||||
my %end_el = %$el;
|
||||
delete $end_el{Attributes};
|
||||
$self->end_element(\%end_el);
|
||||
|
||||
for my $ns (@new_ns) {
|
||||
$self->end_prefix_mapping($ns);
|
||||
}
|
||||
$self->{NSHelper}->pop_context;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub content {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
while (1) {
|
||||
$self->CharData($reader);
|
||||
|
||||
my $data = $reader->data(2);
|
||||
|
||||
if ($data =~ /^<\//) {
|
||||
return 1;
|
||||
}
|
||||
elsif ($data =~ /^&/) {
|
||||
$self->Reference($reader) or $self->parser_error("bare & not allowed in content", $reader);
|
||||
next;
|
||||
}
|
||||
elsif ($data =~ /^<!/) {
|
||||
($self->CDSect($reader)
|
||||
or
|
||||
$self->Comment($reader))
|
||||
and next;
|
||||
}
|
||||
elsif ($data =~ /^<\?/) {
|
||||
$self->PI($reader) and next;
|
||||
}
|
||||
elsif ($data =~ /^</) {
|
||||
$self->element($reader) and next;
|
||||
}
|
||||
last;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub CDSect {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(9);
|
||||
return 0 unless $data =~ /^<!\[CDATA\[/;
|
||||
$reader->move_along(9);
|
||||
|
||||
$self->start_cdata({});
|
||||
|
||||
$data = $reader->data;
|
||||
while (1) {
|
||||
$self->parser_error("EOF looking for CDATA section end", $reader)
|
||||
unless length($data);
|
||||
|
||||
if ($data =~ /^(.*?)\]\]>/s) {
|
||||
my $chars = $1;
|
||||
$reader->move_along(length($chars) + 3);
|
||||
$self->characters({Data => $chars});
|
||||
last;
|
||||
}
|
||||
else {
|
||||
$self->characters({Data => $data});
|
||||
$reader->move_along(length($data));
|
||||
$data = $reader->data;
|
||||
}
|
||||
}
|
||||
$self->end_cdata({});
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub CharData {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data;
|
||||
|
||||
while (1) {
|
||||
return unless length($data);
|
||||
|
||||
if ($data =~ /^([^<&]*)[<&]/s) {
|
||||
my $chars = $1;
|
||||
$self->parser_error("String ']]>' not allowed in character data", $reader)
|
||||
if $chars =~ /\]\]>/;
|
||||
$reader->move_along(length($chars));
|
||||
$self->characters({Data => $chars}) if length($chars);
|
||||
last;
|
||||
}
|
||||
else {
|
||||
$self->characters({Data => $data});
|
||||
$reader->move_along(length($data));
|
||||
$data = $reader->data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub Misc {
|
||||
my ($self, $reader) = @_;
|
||||
if ($self->Comment($reader)) {
|
||||
return 1;
|
||||
}
|
||||
elsif ($self->PI($reader)) {
|
||||
return 1;
|
||||
}
|
||||
elsif ($self->skip_whitespace($reader)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub Reference {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
return 0 unless $reader->match('&');
|
||||
|
||||
my $data = $reader->data;
|
||||
|
||||
# Fetch more data if we have an incomplete numeric reference
|
||||
if ($data =~ /^(#\d*|#x[0-9a-fA-F]*)$/) {
|
||||
$data = $reader->data(length($data) + 6);
|
||||
}
|
||||
|
||||
if ($data =~ /^#x([0-9a-fA-F]+);/) {
|
||||
my $ref = $1;
|
||||
$reader->move_along(length($ref) + 3);
|
||||
my $char = chr_ref(hex($ref));
|
||||
$self->parser_error("Character reference &#$ref; refers to an illegal XML character ($char)", $reader)
|
||||
unless $char =~ /$SingleChar/o;
|
||||
$self->characters({ Data => $char });
|
||||
return 1;
|
||||
}
|
||||
elsif ($data =~ /^#([0-9]+);/) {
|
||||
my $ref = $1;
|
||||
$reader->move_along(length($ref) + 2);
|
||||
my $char = chr_ref($ref);
|
||||
$self->parser_error("Character reference &#$ref; refers to an illegal XML character ($char)", $reader)
|
||||
unless $char =~ /$SingleChar/o;
|
||||
$self->characters({ Data => $char });
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
# EntityRef
|
||||
my $name = $self->Name($reader)
|
||||
|| $self->parser_error("Invalid name in entity", $reader);
|
||||
$reader->match(';') or $self->parser_error("No semi-colon found after entity name", $reader);
|
||||
|
||||
# warn("got entity: \&$name;\n");
|
||||
|
||||
# expand it
|
||||
if ($self->_is_entity($name)) {
|
||||
|
||||
if ($self->_is_external($name)) {
|
||||
my $value = $self->_get_entity($name);
|
||||
my $ent_reader = XML::SAX::PurePerl::Reader::URI->new($value);
|
||||
$self->encoding_detect($ent_reader);
|
||||
$self->extParsedEnt($ent_reader);
|
||||
}
|
||||
else {
|
||||
my $value = $self->_stringify_entity($name);
|
||||
my $ent_reader = XML::SAX::PurePerl::Reader::String->new($value);
|
||||
$self->content($ent_reader);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
elsif ($name =~ /^(?:amp|gt|lt|quot|apos)$/) {
|
||||
$self->characters({ Data => $int_ents{$name} });
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
$self->parser_error("Undeclared entity", $reader);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub AttReference {
|
||||
my ($self, $name, $reader) = @_;
|
||||
if ($name =~ /^#x([0-9a-fA-F]+)$/) {
|
||||
my $chr = chr_ref(hex($1));
|
||||
$chr =~ /$SingleChar/o or $self->parser_error("Character reference '&$name;' refers to an illegal XML character", $reader);
|
||||
return $chr;
|
||||
}
|
||||
elsif ($name =~ /^#([0-9]+)$/) {
|
||||
my $chr = chr_ref($1);
|
||||
$chr =~ /$SingleChar/o or $self->parser_error("Character reference '&$name;' refers to an illegal XML character", $reader);
|
||||
return $chr;
|
||||
}
|
||||
else {
|
||||
if ($self->_is_entity($name)) {
|
||||
if ($self->_is_external($name)) {
|
||||
$self->parser_error("No external entity references allowed in attribute values", $reader);
|
||||
}
|
||||
else {
|
||||
my $value = $self->_stringify_entity($name);
|
||||
return $value;
|
||||
}
|
||||
}
|
||||
elsif ($name =~ /^(?:amp|lt|gt|quot|apos)$/) {
|
||||
return $int_ents{$name};
|
||||
}
|
||||
else {
|
||||
$self->parser_error("Undeclared entity '$name'", $reader);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub extParsedEnt {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
$self->TextDecl($reader);
|
||||
$self->content($reader);
|
||||
}
|
||||
|
||||
sub _is_external {
|
||||
my ($self, $name) = @_;
|
||||
# TODO: Fix this to use $reader to store the entities perhaps.
|
||||
if ($self->{ParseOptions}{external_entities}{$name}) {
|
||||
return 1;
|
||||
}
|
||||
return ;
|
||||
}
|
||||
|
||||
sub _is_entity {
|
||||
my ($self, $name) = @_;
|
||||
# TODO: ditto above
|
||||
if (exists $self->{ParseOptions}{entities}{$name}) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub _stringify_entity {
|
||||
my ($self, $name) = @_;
|
||||
# TODO: ditto above
|
||||
if (exists $self->{ParseOptions}{expanded_entity}{$name}) {
|
||||
return $self->{ParseOptions}{expanded_entity}{$name};
|
||||
}
|
||||
# expand
|
||||
my $reader = XML::SAX::PurePerl::Reader::URI->new($self->{ParseOptions}{entities}{$name});
|
||||
my $ent = '';
|
||||
while(1) {
|
||||
my $data = $reader->data;
|
||||
$ent .= $data;
|
||||
$reader->move_along(length($data)) or last;
|
||||
}
|
||||
return $self->{ParseOptions}{expanded_entity}{$name} = $ent;
|
||||
}
|
||||
|
||||
sub _get_entity {
|
||||
my ($self, $name) = @_;
|
||||
# TODO: ditto above
|
||||
return $self->{ParseOptions}{entities}{$name};
|
||||
}
|
||||
|
||||
sub skip_whitespace {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data;
|
||||
|
||||
my $found = 0;
|
||||
while ($data =~ s/^([\x20\x0A\x0D\x09]*)//) {
|
||||
last unless length($1);
|
||||
$found++;
|
||||
$reader->move_along(length($1));
|
||||
$data = $reader->data;
|
||||
}
|
||||
|
||||
return $found;
|
||||
}
|
||||
|
||||
sub Attribute {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
$self->skip_whitespace($reader) || return;
|
||||
|
||||
my $data = $reader->data(2);
|
||||
return if $data =~ /^\/?>/;
|
||||
|
||||
if (my $name = $self->Name($reader)) {
|
||||
$self->skip_whitespace($reader);
|
||||
$reader->match('=') or $self->parser_error("No '=' in Attribute", $reader);
|
||||
$self->skip_whitespace($reader);
|
||||
my $value = $self->AttValue($reader);
|
||||
|
||||
if (!$self->cdata_attrib($name)) {
|
||||
$value =~ s/^\x20*//; # discard leading spaces
|
||||
$value =~ s/\x20*$//; # discard trailing spaces
|
||||
$value =~ s/ {1,}/ /g; # all >1 space to single space
|
||||
}
|
||||
|
||||
return $name, $value;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
sub cdata_attrib {
|
||||
# TODO implement this!
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub AttValue {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $quote = $self->quote($reader);
|
||||
|
||||
my $value = '';
|
||||
|
||||
while (1) {
|
||||
my $data = $reader->data;
|
||||
$self->parser_error("EOF found while looking for the end of attribute value", $reader)
|
||||
unless length($data);
|
||||
if ($data =~ /^([^$quote]*)$quote/) {
|
||||
$reader->move_along(length($1) + 1);
|
||||
$value .= $1;
|
||||
last;
|
||||
}
|
||||
else {
|
||||
$value .= $data;
|
||||
$reader->move_along(length($data));
|
||||
}
|
||||
}
|
||||
|
||||
if ($value =~ /</) {
|
||||
$self->parser_error("< character not allowed in attribute values", $reader);
|
||||
}
|
||||
|
||||
$value =~ s/[\x09\x0A\x0D]/\x20/g;
|
||||
$value =~ s/&(#(x[0-9a-fA-F]+)|#([0-9]+)|\w+);/$self->AttReference($1, $reader)/geo;
|
||||
|
||||
return $value;
|
||||
}
|
||||
|
||||
sub Comment {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(4);
|
||||
if ($data =~ /^<!--/) {
|
||||
$reader->move_along(4);
|
||||
my $comment_str = '';
|
||||
while (1) {
|
||||
my $data = $reader->data;
|
||||
$self->parser_error("End of data seen while looking for close comment marker", $reader)
|
||||
unless length($data);
|
||||
if ($data =~ /^(.*?)-->/s) {
|
||||
$comment_str .= $1;
|
||||
$self->parser_error("Invalid comment (dash)", $reader) if $comment_str =~ /-$/;
|
||||
$reader->move_along(length($1) + 3);
|
||||
last;
|
||||
}
|
||||
else {
|
||||
$comment_str .= $data;
|
||||
$reader->move_along(length($data));
|
||||
}
|
||||
}
|
||||
|
||||
$self->comment({ Data => $comment_str });
|
||||
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub PI {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(2);
|
||||
|
||||
if ($data =~ /^<\?/) {
|
||||
$reader->move_along(2);
|
||||
my ($target);
|
||||
$target = $self->Name($reader) ||
|
||||
$self->parser_error("PI has no target", $reader);
|
||||
|
||||
my $pi_data = '';
|
||||
if ($self->skip_whitespace($reader)) {
|
||||
while (1) {
|
||||
my $data = $reader->data;
|
||||
$self->parser_error("End of data seen while looking for close PI marker", $reader)
|
||||
unless length($data);
|
||||
if ($data =~ /^(.*?)\?>/s) {
|
||||
$pi_data .= $1;
|
||||
$reader->move_along(length($1) + 2);
|
||||
last;
|
||||
}
|
||||
else {
|
||||
$pi_data .= $data;
|
||||
$reader->move_along(length($data));
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
my $data = $reader->data(2);
|
||||
$data =~ /^\?>/ or $self->parser_error("PI closing sequence not found", $reader);
|
||||
$reader->move_along(2);
|
||||
}
|
||||
|
||||
$self->processing_instruction({ Target => $target, Data => $pi_data });
|
||||
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub Name {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $name = '';
|
||||
while(1) {
|
||||
my $data = $reader->data;
|
||||
return unless length($data);
|
||||
$data =~ /^([^\s>\/&\?;=<\)\(\[\],\%\#\!\*\|]*)/ or return;
|
||||
$name .= $1;
|
||||
my $len = length($1);
|
||||
$reader->move_along($len);
|
||||
last if ($len != length($data));
|
||||
}
|
||||
|
||||
return unless length($name);
|
||||
|
||||
$name =~ /$NameChar/o or $self->parser_error("Name <$name> does not match NameChar production", $reader);
|
||||
|
||||
return $name;
|
||||
}
|
||||
|
||||
sub quote {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data;
|
||||
|
||||
$data =~ /^(['"])/ or $self->parser_error("Invalid quote token", $reader);
|
||||
$reader->move_along(1);
|
||||
return $1;
|
||||
}
|
||||
|
||||
1;
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
XML::SAX::PurePerl - Pure Perl XML Parser with SAX2 interface
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use XML::Handler::Foo;
|
||||
use XML::SAX::PurePerl;
|
||||
my $handler = XML::Handler::Foo->new();
|
||||
my $parser = XML::SAX::PurePerl->new(Handler => $handler);
|
||||
$parser->parse_uri("myfile.xml");
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module implements an XML parser in pure perl. It is written around the
|
||||
upcoming perl 5.8's unicode support and support for multiple document
|
||||
encodings (using the PerlIO layer), however it has been ported to work with
|
||||
ASCII/UTF8 documents under lower perl versions.
|
||||
|
||||
The SAX2 API is described in detail at http://sourceforge.net/projects/perl-xml/, in
|
||||
the CVS archive, under libxml-perl/docs. Hopefully those documents will be in a
|
||||
better location soon.
|
||||
|
||||
Please refer to the SAX2 documentation for how to use this module - it is merely a
|
||||
front end to SAX2, and implements nothing that is not in that spec (or at least tries
|
||||
not to - please email me if you find errors in this implementation).
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
XML::SAX::PurePerl is B<slow>. Very slow. I suggest you use something else
|
||||
in fact. However it is great as a fallback parser for XML::SAX, where the
|
||||
user might not be able to install an XS based parser or C library.
|
||||
|
||||
Currently lots, probably. At the moment the weakest area is parsing DOCTYPE declarations,
|
||||
though the code is in place to start doing this. Also parsing parameter entity
|
||||
references is causing me much confusion, since it's not exactly what I would call
|
||||
trivial, or well documented in the XML grammar. XML documents with internal subsets
|
||||
are likely to fail.
|
||||
|
||||
I am however trying to work towards full conformance using the Oasis test suite.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Matt Sergeant, matt@sergeant.org. Copyright 2001.
|
||||
|
||||
Please report all bugs to the Perl-XML mailing list at perl-xml@listserv.activestate.com.
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
This is free software. You may use it or redistribute it under the same terms as
|
||||
Perl 5.7.2 itself.
|
||||
|
||||
=cut
|
||||
|
||||
603
database/perl/vendor/lib/XML/SAX/PurePerl/DTDDecls.pm
vendored
Normal file
603
database/perl/vendor/lib/XML/SAX/PurePerl/DTDDecls.pm
vendored
Normal file
@@ -0,0 +1,603 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl;
|
||||
|
||||
use strict;
|
||||
use XML::SAX::PurePerl::Productions qw($SingleChar);
|
||||
|
||||
sub elementdecl {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(9);
|
||||
return 0 unless $data =~ /^<!ELEMENT/;
|
||||
$reader->move_along(9);
|
||||
|
||||
$self->skip_whitespace($reader) ||
|
||||
$self->parser_error("No whitespace after ELEMENT declaration", $reader);
|
||||
|
||||
my $name = $self->Name($reader);
|
||||
|
||||
$self->skip_whitespace($reader) ||
|
||||
$self->parser_error("No whitespace after ELEMENT's name", $reader);
|
||||
|
||||
$self->contentspec($reader, $name);
|
||||
|
||||
$self->skip_whitespace($reader);
|
||||
|
||||
$reader->match('>') or $self->parser_error("Closing angle bracket not found on ELEMENT declaration", $reader);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub contentspec {
|
||||
my ($self, $reader, $name) = @_;
|
||||
|
||||
my $data = $reader->data(5);
|
||||
|
||||
my $model;
|
||||
if ($data =~ /^EMPTY/) {
|
||||
$reader->move_along(5);
|
||||
$model = 'EMPTY';
|
||||
}
|
||||
elsif ($data =~ /^ANY/) {
|
||||
$reader->move_along(3);
|
||||
$model = 'ANY';
|
||||
}
|
||||
else {
|
||||
$model = $self->Mixed_or_children($reader);
|
||||
}
|
||||
|
||||
if ($model) {
|
||||
# call SAX callback now.
|
||||
$self->element_decl({Name => $name, Model => $model});
|
||||
return 1;
|
||||
}
|
||||
|
||||
$self->parser_error("contentspec not found in ELEMENT declaration", $reader);
|
||||
}
|
||||
|
||||
sub Mixed_or_children {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(8);
|
||||
$data =~ /^\(/ or return; # $self->parser_error("No opening bracket in Mixed or children", $reader);
|
||||
|
||||
if ($data =~ /^\(\s*\#PCDATA/) {
|
||||
$reader->match('(');
|
||||
$self->skip_whitespace($reader);
|
||||
$reader->move_along(7);
|
||||
my $model = $self->Mixed($reader);
|
||||
return $model;
|
||||
}
|
||||
|
||||
# not matched - must be Children
|
||||
return $self->children($reader);
|
||||
}
|
||||
|
||||
# Mixed ::= ( '(' S* PCDATA ( S* '|' S* QName )* S* ')' '*' )
|
||||
# | ( '(' S* PCDATA S* ')' )
|
||||
sub Mixed {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
# Mixed_or_children already matched '(' S* '#PCDATA'
|
||||
|
||||
my $model = '(#PCDATA';
|
||||
|
||||
$self->skip_whitespace($reader);
|
||||
|
||||
my %seen;
|
||||
|
||||
while (1) {
|
||||
last unless $reader->match('|');
|
||||
$self->skip_whitespace($reader);
|
||||
|
||||
my $name = $self->Name($reader) ||
|
||||
$self->parser_error("No 'Name' after Mixed content '|'", $reader);
|
||||
|
||||
if ($seen{$name}) {
|
||||
$self->parser_error("Element '$name' has already appeared in this group", $reader);
|
||||
}
|
||||
$seen{$name}++;
|
||||
|
||||
$model .= "|$name";
|
||||
|
||||
$self->skip_whitespace($reader);
|
||||
}
|
||||
|
||||
$reader->match(')') || $self->parser_error("no closing bracket on mixed content", $reader);
|
||||
|
||||
$model .= ")";
|
||||
|
||||
if ($reader->match('*')) {
|
||||
$model .= "*";
|
||||
}
|
||||
|
||||
return $model;
|
||||
}
|
||||
|
||||
# [[47]] Children ::= ChoiceOrSeq Cardinality?
|
||||
# [[48]] Cp ::= ( QName | ChoiceOrSeq ) Cardinality?
|
||||
# ChoiceOrSeq ::= '(' S* Cp ( Choice | Seq )? S* ')'
|
||||
# [[49]] Choice ::= ( S* '|' S* Cp )+
|
||||
# [[50]] Seq ::= ( S* ',' S* Cp )+
|
||||
# // Children ::= (Choice | Seq) Cardinality?
|
||||
# // Cp ::= ( QName | Choice | Seq) Cardinality?
|
||||
# // Choice ::= '(' S* Cp ( S* '|' S* Cp )+ S* ')'
|
||||
# // Seq ::= '(' S* Cp ( S* ',' S* Cp )* S* ')'
|
||||
# [[51]] Mixed ::= ( '(' S* PCDATA ( S* '|' S* QName )* S* ')' MixedCardinality )
|
||||
# | ( '(' S* PCDATA S* ')' )
|
||||
# Cardinality ::= '?' | '+' | '*'
|
||||
# MixedCardinality ::= '*'
|
||||
sub children {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
return $self->ChoiceOrSeq($reader) . $self->Cardinality($reader);
|
||||
}
|
||||
|
||||
sub ChoiceOrSeq {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
$reader->match('(') or $self->parser_error("choice/seq contains no opening bracket", $reader);
|
||||
|
||||
my $model = '(';
|
||||
|
||||
$self->skip_whitespace($reader);
|
||||
|
||||
$model .= $self->Cp($reader);
|
||||
|
||||
if (my $choice = $self->Choice($reader)) {
|
||||
$model .= $choice;
|
||||
}
|
||||
else {
|
||||
$model .= $self->Seq($reader);
|
||||
}
|
||||
|
||||
$self->skip_whitespace($reader);
|
||||
|
||||
$reader->match(')') or $self->parser_error("choice/seq contains no closing bracket", $reader);
|
||||
|
||||
$model .= ')';
|
||||
|
||||
return $model;
|
||||
}
|
||||
|
||||
sub Cardinality {
|
||||
my ($self, $reader) = @_;
|
||||
# cardinality is always optional
|
||||
my $data = $reader->data;
|
||||
if ($data =~ /^([\?\+\*])/) {
|
||||
$reader->move_along(1);
|
||||
return $1;
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
sub Cp {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $model;
|
||||
my $name = eval
|
||||
{
|
||||
if (my $name = $self->Name($reader)) {
|
||||
return $name . $self->Cardinality($reader);
|
||||
}
|
||||
};
|
||||
return $name if defined $name;
|
||||
return $self->ChoiceOrSeq($reader) . $self->Cardinality($reader);
|
||||
}
|
||||
|
||||
sub Choice {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $model = '';
|
||||
$self->skip_whitespace($reader);
|
||||
|
||||
while ($reader->match('|')) {
|
||||
$self->skip_whitespace($reader);
|
||||
$model .= '|';
|
||||
$model .= $self->Cp($reader);
|
||||
$self->skip_whitespace($reader);
|
||||
}
|
||||
|
||||
return $model;
|
||||
}
|
||||
|
||||
sub Seq {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $model = '';
|
||||
$self->skip_whitespace($reader);
|
||||
|
||||
while ($reader->match(',')) {
|
||||
$self->skip_whitespace($reader);
|
||||
my $cp = $self->Cp($reader);
|
||||
if ($cp) {
|
||||
$model .= ',';
|
||||
$model .= $cp;
|
||||
}
|
||||
$self->skip_whitespace($reader);
|
||||
}
|
||||
|
||||
return $model;
|
||||
}
|
||||
|
||||
sub AttlistDecl {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(9);
|
||||
if ($data =~ /^<!ATTLIST/) {
|
||||
# It's an attlist
|
||||
|
||||
$reader->move_along(9);
|
||||
|
||||
$self->skip_whitespace($reader) ||
|
||||
$self->parser_error("No whitespace after ATTLIST declaration", $reader);
|
||||
my $name = $self->Name($reader);
|
||||
|
||||
$self->AttDefList($reader, $name);
|
||||
|
||||
$self->skip_whitespace($reader);
|
||||
|
||||
$reader->match('>') or $self->parser_error("Closing angle bracket not found on ATTLIST declaration", $reader);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub AttDefList {
|
||||
my ($self, $reader, $name) = @_;
|
||||
|
||||
1 while $self->AttDef($reader, $name);
|
||||
}
|
||||
|
||||
sub AttDef {
|
||||
my ($self, $reader, $el_name) = @_;
|
||||
|
||||
$self->skip_whitespace($reader) || return 0;
|
||||
my $att_name = $self->Name($reader) || return 0;
|
||||
$self->skip_whitespace($reader) ||
|
||||
$self->parser_error("No whitespace after Name in attribute definition", $reader);
|
||||
my $att_type = $self->AttType($reader);
|
||||
|
||||
$self->skip_whitespace($reader) ||
|
||||
$self->parser_error("No whitespace after AttType in attribute definition", $reader);
|
||||
my ($mode, $value) = $self->DefaultDecl($reader);
|
||||
|
||||
# fire SAX event here!
|
||||
$self->attribute_decl({
|
||||
eName => $el_name,
|
||||
aName => $att_name,
|
||||
Type => $att_type,
|
||||
Mode => $mode,
|
||||
Value => $value,
|
||||
});
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub AttType {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
return $self->StringType($reader) ||
|
||||
$self->TokenizedType($reader) ||
|
||||
$self->EnumeratedType($reader) ||
|
||||
$self->parser_error("Can't match AttType", $reader);
|
||||
}
|
||||
|
||||
sub StringType {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(5);
|
||||
return unless $data =~ /^CDATA/;
|
||||
$reader->move_along(5);
|
||||
return 'CDATA';
|
||||
}
|
||||
|
||||
sub TokenizedType {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(8);
|
||||
if ($data =~ /^(IDREFS?|ID|ENTITIES|ENTITY|NMTOKENS?)/) {
|
||||
$reader->move_along(length($1));
|
||||
return $1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
sub EnumeratedType {
|
||||
my ($self, $reader) = @_;
|
||||
return $self->NotationType($reader) || $self->Enumeration($reader);
|
||||
}
|
||||
|
||||
sub NotationType {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(8);
|
||||
return unless $data =~ /^NOTATION/;
|
||||
$reader->move_along(8);
|
||||
|
||||
$self->skip_whitespace($reader) ||
|
||||
$self->parser_error("No whitespace after NOTATION", $reader);
|
||||
$reader->match('(') or $self->parser_error("No opening bracket in notation section", $reader);
|
||||
|
||||
$self->skip_whitespace($reader);
|
||||
my $model = 'NOTATION (';
|
||||
my $name = $self->Name($reader) ||
|
||||
$self->parser_error("No name in notation section", $reader);
|
||||
$model .= $name;
|
||||
$self->skip_whitespace($reader);
|
||||
$data = $reader->data;
|
||||
while ($data =~ /^\|/) {
|
||||
$reader->move_along(1);
|
||||
$model .= '|';
|
||||
$self->skip_whitespace($reader);
|
||||
my $name = $self->Name($reader) ||
|
||||
$self->parser_error("No name in notation section", $reader);
|
||||
$model .= $name;
|
||||
$self->skip_whitespace($reader);
|
||||
$data = $reader->data;
|
||||
}
|
||||
$data =~ /^\)/ or $self->parser_error("No closing bracket in notation section", $reader);
|
||||
$reader->move_along(1);
|
||||
|
||||
$model .= ')';
|
||||
|
||||
return $model;
|
||||
}
|
||||
|
||||
sub Enumeration {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
return unless $reader->match('(');
|
||||
|
||||
$self->skip_whitespace($reader);
|
||||
my $model = '(';
|
||||
my $nmtoken = $self->Nmtoken($reader) ||
|
||||
$self->parser_error("No Nmtoken in enumerated declaration", $reader);
|
||||
$model .= $nmtoken;
|
||||
$self->skip_whitespace($reader);
|
||||
my $data = $reader->data;
|
||||
while ($data =~ /^\|/) {
|
||||
$model .= '|';
|
||||
$reader->move_along(1);
|
||||
$self->skip_whitespace($reader);
|
||||
my $nmtoken = $self->Nmtoken($reader) ||
|
||||
$self->parser_error("No Nmtoken in enumerated declaration", $reader);
|
||||
$model .= $nmtoken;
|
||||
$self->skip_whitespace($reader);
|
||||
$data = $reader->data;
|
||||
}
|
||||
$data =~ /^\)/ or $self->parser_error("No closing bracket in enumerated declaration", $reader);
|
||||
$reader->move_along(1);
|
||||
|
||||
$model .= ')';
|
||||
|
||||
return $model;
|
||||
}
|
||||
|
||||
sub Nmtoken {
|
||||
my ($self, $reader) = @_;
|
||||
return $self->Name($reader);
|
||||
}
|
||||
|
||||
sub DefaultDecl {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(9);
|
||||
if ($data =~ /^(\#REQUIRED|\#IMPLIED)/) {
|
||||
$reader->move_along(length($1));
|
||||
return $1;
|
||||
}
|
||||
my $model = '';
|
||||
if ($data =~ /^\#FIXED/) {
|
||||
$reader->move_along(6);
|
||||
$self->skip_whitespace($reader) || $self->parser_error(
|
||||
"no whitespace after FIXED specifier", $reader);
|
||||
my $value = $self->AttValue($reader);
|
||||
return "#FIXED", $value;
|
||||
}
|
||||
my $value = $self->AttValue($reader);
|
||||
return undef, $value;
|
||||
}
|
||||
|
||||
sub EntityDecl {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(8);
|
||||
return 0 unless $data =~ /^<!ENTITY/;
|
||||
$reader->move_along(8);
|
||||
|
||||
$self->skip_whitespace($reader) || $self->parser_error(
|
||||
"No whitespace after ENTITY declaration", $reader);
|
||||
|
||||
$self->PEDecl($reader) || $self->GEDecl($reader);
|
||||
|
||||
$self->skip_whitespace($reader);
|
||||
|
||||
$reader->match('>') or $self->parser_error("No closing '>' in entity definition", $reader);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub GEDecl {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $name = $self->Name($reader) || $self->parser_error("No entity name given", $reader);
|
||||
$self->skip_whitespace($reader) || $self->parser_error("No whitespace after entity name", $reader);
|
||||
|
||||
# TODO: ExternalID calls lexhandler method. Wrong place for it.
|
||||
my $value;
|
||||
if ($value = $self->ExternalID($reader)) {
|
||||
$value .= $self->NDataDecl($reader);
|
||||
}
|
||||
else {
|
||||
$value = $self->EntityValue($reader);
|
||||
}
|
||||
|
||||
if ($self->{ParseOptions}{entities}{$name}) {
|
||||
warn("entity $name already exists\n");
|
||||
} else {
|
||||
$self->{ParseOptions}{entities}{$name} = 1;
|
||||
$self->{ParseOptions}{expanded_entity}{$name} = $value; # ???
|
||||
}
|
||||
# do callback?
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub PEDecl {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
return 0 unless $reader->match('%');
|
||||
|
||||
$self->skip_whitespace($reader) || $self->parser_error("No whitespace after parameter entity marker", $reader);
|
||||
my $name = $self->Name($reader) || $self->parser_error("No parameter entity name given", $reader);
|
||||
$self->skip_whitespace($reader) || $self->parser_error("No whitespace after parameter entity name", $reader);
|
||||
my $value = $self->ExternalID($reader) ||
|
||||
$self->EntityValue($reader) ||
|
||||
$self->parser_error("PE is not a value or an external resource", $reader);
|
||||
# do callback?
|
||||
return 1;
|
||||
}
|
||||
|
||||
my $quotre = qr/[^%&\"]/;
|
||||
my $aposre = qr/[^%&\']/;
|
||||
|
||||
sub EntityValue {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data;
|
||||
my $quote = '"';
|
||||
my $re = $quotre;
|
||||
if ($data !~ /^"/) {
|
||||
$data =~ /^'/ or $self->parser_error("Not a quote character", $reader);
|
||||
$quote = "'";
|
||||
$re = $aposre;
|
||||
}
|
||||
$reader->move_along(1);
|
||||
|
||||
my $value = '';
|
||||
|
||||
while (1) {
|
||||
my $data = $reader->data;
|
||||
|
||||
$self->parser_error("EOF found while reading entity value", $reader)
|
||||
unless length($data);
|
||||
|
||||
if ($data =~ /^($re+)/) {
|
||||
my $match = $1;
|
||||
$value .= $match;
|
||||
$reader->move_along(length($match));
|
||||
}
|
||||
elsif ($reader->match('&')) {
|
||||
# if it's a char ref, expand now:
|
||||
if ($reader->match('#')) {
|
||||
my $char;
|
||||
my $ref = '';
|
||||
if ($reader->match('x')) {
|
||||
my $data = $reader->data;
|
||||
while (1) {
|
||||
$self->parser_error("EOF looking for reference end", $reader)
|
||||
unless length($data);
|
||||
if ($data !~ /^([0-9a-fA-F]*)/) {
|
||||
last;
|
||||
}
|
||||
$ref .= $1;
|
||||
$reader->move_along(length($1));
|
||||
if (length($1) == length($data)) {
|
||||
$data = $reader->data;
|
||||
}
|
||||
else {
|
||||
last;
|
||||
}
|
||||
}
|
||||
$char = chr_ref(hex($ref));
|
||||
$ref = "x$ref";
|
||||
}
|
||||
else {
|
||||
my $data = $reader->data;
|
||||
while (1) {
|
||||
$self->parser_error("EOF looking for reference end", $reader)
|
||||
unless length($data);
|
||||
if ($data !~ /^([0-9]*)/) {
|
||||
last;
|
||||
}
|
||||
$ref .= $1;
|
||||
$reader->move_along(length($1));
|
||||
if (length($1) == length($data)) {
|
||||
$data = $reader->data;
|
||||
}
|
||||
else {
|
||||
last;
|
||||
}
|
||||
}
|
||||
$char = chr($ref);
|
||||
}
|
||||
$reader->match(';') ||
|
||||
$self->parser_error("No semi-colon found after character reference", $reader);
|
||||
if ($char !~ $SingleChar) { # match a single character
|
||||
$self->parser_error("Character reference '&#$ref;' refers to an illegal XML character ($char)", $reader);
|
||||
}
|
||||
$value .= $char;
|
||||
}
|
||||
else {
|
||||
# entity refs in entities get expanded later, so don't parse now.
|
||||
$value .= '&';
|
||||
}
|
||||
}
|
||||
elsif ($reader->match('%')) {
|
||||
$value .= $self->PEReference($reader);
|
||||
}
|
||||
elsif ($reader->match($quote)) {
|
||||
# end of attrib
|
||||
last;
|
||||
}
|
||||
else {
|
||||
$self->parser_error("Invalid character in attribute value: " . substr($reader->data, 0, 1), $reader);
|
||||
}
|
||||
}
|
||||
|
||||
return $value;
|
||||
}
|
||||
|
||||
sub NDataDecl {
|
||||
my ($self, $reader) = @_;
|
||||
$self->skip_whitespace($reader) || return '';
|
||||
my $data = $reader->data(5);
|
||||
return '' unless $data =~ /^NDATA/;
|
||||
$reader->move_along(5);
|
||||
$self->skip_whitespace($reader) || $self->parser_error("No whitespace after NDATA declaration", $reader);
|
||||
my $name = $self->Name($reader) || $self->parser_error("NDATA declaration lacks a proper Name", $reader);
|
||||
return " NDATA $name";
|
||||
}
|
||||
|
||||
sub NotationDecl {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(10);
|
||||
return 0 unless $data =~ /^<!NOTATION/;
|
||||
$reader->move_along(10);
|
||||
$self->skip_whitespace($reader) ||
|
||||
$self->parser_error("No whitespace after NOTATION declaration", $reader);
|
||||
$data = $reader->data;
|
||||
my $value = '';
|
||||
while(1) {
|
||||
$self->parser_error("EOF found while looking for end of NotationDecl", $reader)
|
||||
unless length($data);
|
||||
|
||||
if ($data =~ /^([^>]*)>/) {
|
||||
$value .= $1;
|
||||
$reader->move_along(length($1) + 1);
|
||||
$self->notation_decl({Name => "FIXME", SystemId => "FIXME", PublicId => "FIXME" });
|
||||
last;
|
||||
}
|
||||
else {
|
||||
$value .= $data;
|
||||
$reader->move_along(length($data));
|
||||
$data = $reader->data;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
1;
|
||||
95
database/perl/vendor/lib/XML/SAX/PurePerl/DebugHandler.pm
vendored
Normal file
95
database/perl/vendor/lib/XML/SAX/PurePerl/DebugHandler.pm
vendored
Normal file
@@ -0,0 +1,95 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl::DebugHandler;
|
||||
|
||||
use strict;
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
my %opts = @_;
|
||||
return bless \%opts, $class;
|
||||
}
|
||||
|
||||
# DocumentHandler
|
||||
|
||||
sub set_document_locator {
|
||||
my $self = shift;
|
||||
print "set_document_locator\n" if $ENV{DEBUG_XML};
|
||||
$self->{seen}{set_document_locator}++;
|
||||
}
|
||||
|
||||
sub start_document {
|
||||
my $self = shift;
|
||||
print "start_document\n" if $ENV{DEBUG_XML};
|
||||
$self->{seen}{start_document}++;
|
||||
}
|
||||
|
||||
sub end_document {
|
||||
my $self = shift;
|
||||
print "end_document\n" if $ENV{DEBUG_XML};
|
||||
$self->{seen}{end_document}++;
|
||||
}
|
||||
|
||||
sub start_element {
|
||||
my $self = shift;
|
||||
print "start_element\n" if $ENV{DEBUG_XML};
|
||||
$self->{seen}{start_element}++;
|
||||
}
|
||||
|
||||
sub end_element {
|
||||
my $self = shift;
|
||||
print "end_element\n" if $ENV{DEBUG_XML};
|
||||
$self->{seen}{end_element}++;
|
||||
}
|
||||
|
||||
sub characters {
|
||||
my $self = shift;
|
||||
print "characters\n" if $ENV{DEBUG_XML};
|
||||
# warn "Char: ", $_[0]->{Data}, "\n";
|
||||
$self->{seen}{characters}++;
|
||||
}
|
||||
|
||||
sub processing_instruction {
|
||||
my $self = shift;
|
||||
print "processing_instruction\n" if $ENV{DEBUG_XML};
|
||||
$self->{seen}{processing_instruction}++;
|
||||
}
|
||||
|
||||
sub ignorable_whitespace {
|
||||
my $self = shift;
|
||||
print "ignorable_whitespace\n" if $ENV{DEBUG_XML};
|
||||
$self->{seen}{ignorable_whitespace}++;
|
||||
}
|
||||
|
||||
# LexHandler
|
||||
|
||||
sub comment {
|
||||
my $self = shift;
|
||||
print "comment\n" if $ENV{DEBUG_XML};
|
||||
$self->{seen}{comment}++;
|
||||
}
|
||||
|
||||
# DTDHandler
|
||||
|
||||
sub notation_decl {
|
||||
my $self = shift;
|
||||
print "notation_decl\n" if $ENV{DEBUG_XML};
|
||||
$self->{seen}{notation_decl}++;
|
||||
}
|
||||
|
||||
sub unparsed_entity_decl {
|
||||
my $self = shift;
|
||||
print "unparsed_entity_decl\n" if $ENV{DEBUG_XML};
|
||||
$self->{seen}{entity_decl}++;
|
||||
}
|
||||
|
||||
# EntityResolver
|
||||
|
||||
sub resolve_entity {
|
||||
my $self = shift;
|
||||
print "resolve_entity\n" if $ENV{DEBUG_XML};
|
||||
$self->{seen}{resolve_entity}++;
|
||||
return '';
|
||||
}
|
||||
|
||||
1;
|
||||
180
database/perl/vendor/lib/XML/SAX/PurePerl/DocType.pm
vendored
Normal file
180
database/perl/vendor/lib/XML/SAX/PurePerl/DocType.pm
vendored
Normal file
@@ -0,0 +1,180 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl;
|
||||
|
||||
use strict;
|
||||
use XML::SAX::PurePerl::Productions qw($PubidChar);
|
||||
|
||||
sub doctypedecl {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(9);
|
||||
if ($data =~ /^<!DOCTYPE/) {
|
||||
$reader->move_along(9);
|
||||
$self->skip_whitespace($reader) ||
|
||||
$self->parser_error("No whitespace after doctype declaration", $reader);
|
||||
|
||||
my $root_name = $self->Name($reader) ||
|
||||
$self->parser_error("Doctype declaration has no root element name", $reader);
|
||||
|
||||
if ($self->skip_whitespace($reader)) {
|
||||
# might be externalid...
|
||||
my %dtd = $self->ExternalID($reader);
|
||||
# TODO: Call SAX event
|
||||
}
|
||||
|
||||
$self->skip_whitespace($reader);
|
||||
|
||||
$self->InternalSubset($reader);
|
||||
|
||||
$reader->match('>') or $self->parser_error("Doctype not closed", $reader);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub ExternalID {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(6);
|
||||
|
||||
if ($data =~ /^SYSTEM/) {
|
||||
$reader->move_along(6);
|
||||
$self->skip_whitespace($reader) ||
|
||||
$self->parser_error("No whitespace after SYSTEM identifier", $reader);
|
||||
return (SYSTEM => $self->SystemLiteral($reader));
|
||||
}
|
||||
elsif ($data =~ /^PUBLIC/) {
|
||||
$reader->move_along(6);
|
||||
$self->skip_whitespace($reader) ||
|
||||
$self->parser_error("No whitespace after PUBLIC identifier", $reader);
|
||||
|
||||
my $quote = $self->quote($reader) ||
|
||||
$self->parser_error("Not a quote character in PUBLIC identifier", $reader);
|
||||
|
||||
my $data = $reader->data;
|
||||
my $pubid = '';
|
||||
while(1) {
|
||||
$self->parser_error("EOF while looking for end of PUBLIC identifiier", $reader)
|
||||
unless length($data);
|
||||
|
||||
if ($data =~ /^([^$quote]*)$quote/) {
|
||||
$pubid .= $1;
|
||||
$reader->move_along(length($1) + 1);
|
||||
last;
|
||||
}
|
||||
else {
|
||||
$pubid .= $data;
|
||||
$reader->move_along(length($data));
|
||||
$data = $reader->data;
|
||||
}
|
||||
}
|
||||
|
||||
if ($pubid !~ /^($PubidChar)+$/) {
|
||||
$self->parser_error("Invalid characters in PUBLIC identifier", $reader);
|
||||
}
|
||||
|
||||
$self->skip_whitespace($reader) ||
|
||||
$self->parser_error("Not whitespace after PUBLIC ID in DOCTYPE", $reader);
|
||||
|
||||
return (PUBLIC => $pubid,
|
||||
SYSTEM => $self->SystemLiteral($reader));
|
||||
}
|
||||
else {
|
||||
return;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub SystemLiteral {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $quote = $self->quote($reader);
|
||||
|
||||
my $data = $reader->data;
|
||||
my $systemid = '';
|
||||
while (1) {
|
||||
$self->parser_error("EOF found while looking for end of System Literal", $reader)
|
||||
unless length($data);
|
||||
if ($data =~ /^([^$quote]*)$quote/) {
|
||||
$systemid .= $1;
|
||||
$reader->move_along(length($1) + 1);
|
||||
return $systemid;
|
||||
}
|
||||
else {
|
||||
$systemid .= $data;
|
||||
$reader->move_along(length($data));
|
||||
$data = $reader->data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub InternalSubset {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
return 0 unless $reader->match('[');
|
||||
|
||||
1 while $self->IntSubsetDecl($reader);
|
||||
|
||||
$reader->match(']') or $self->parser_error("No close bracket on internal subset (found: " . $reader->data, $reader);
|
||||
$self->skip_whitespace($reader);
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub IntSubsetDecl {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
return $self->DeclSep($reader) || $self->markupdecl($reader);
|
||||
}
|
||||
|
||||
sub DeclSep {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
if ($self->skip_whitespace($reader)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ($self->PEReference($reader)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
# if ($self->ParsedExtSubset($reader)) {
|
||||
# return 1;
|
||||
# }
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub PEReference {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
return 0 unless $reader->match('%');
|
||||
|
||||
my $peref = $self->Name($reader) ||
|
||||
$self->parser_error("PEReference did not find a Name", $reader);
|
||||
# TODO - load/parse the peref
|
||||
|
||||
$reader->match(';') or $self->parser_error("Invalid token in PEReference", $reader);
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub markupdecl {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
if ($self->elementdecl($reader) ||
|
||||
$self->AttlistDecl($reader) ||
|
||||
$self->EntityDecl($reader) ||
|
||||
$self->NotationDecl($reader) ||
|
||||
$self->PI($reader) ||
|
||||
$self->Comment($reader))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
1;
|
||||
105
database/perl/vendor/lib/XML/SAX/PurePerl/EncodingDetect.pm
vendored
Normal file
105
database/perl/vendor/lib/XML/SAX/PurePerl/EncodingDetect.pm
vendored
Normal file
@@ -0,0 +1,105 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl; # NB, not ::EncodingDetect!
|
||||
|
||||
use strict;
|
||||
|
||||
sub encoding_detect {
|
||||
my ($parser, $reader) = @_;
|
||||
|
||||
my $error = "Invalid byte sequence at start of file";
|
||||
|
||||
my $data = $reader->data;
|
||||
if ($data =~ /^\x00\x00\xFE\xFF/) {
|
||||
# BO-UCS4-be
|
||||
$reader->move_along(4);
|
||||
$reader->set_encoding('UCS-4BE');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\x00\x00\xFF\xFE/) {
|
||||
# BO-UCS-4-2143
|
||||
$reader->move_along(4);
|
||||
$reader->set_encoding('UCS-4-2143');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\x00\x00\x00\x3C/) {
|
||||
$reader->set_encoding('UCS-4BE');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\x00\x00\x3C\x00/) {
|
||||
$reader->set_encoding('UCS-4-2143');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\x00\x3C\x00\x00/) {
|
||||
$reader->set_encoding('UCS-4-3412');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\x00\x3C\x00\x3F/) {
|
||||
$reader->set_encoding('UTF-16BE');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\xFF\xFE\x00\x00/) {
|
||||
# BO-UCS-4LE
|
||||
$reader->move_along(4);
|
||||
$reader->set_encoding('UCS-4LE');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\xFF\xFE/) {
|
||||
$reader->move_along(2);
|
||||
$reader->set_encoding('UTF-16LE');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\xFE\xFF\x00\x00/) {
|
||||
$reader->move_along(4);
|
||||
$reader->set_encoding('UCS-4-3412');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\xFE\xFF/) {
|
||||
$reader->move_along(2);
|
||||
$reader->set_encoding('UTF-16BE');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\xEF\xBB\xBF/) { # UTF-8 BOM
|
||||
$reader->move_along(3);
|
||||
$reader->set_encoding('UTF-8');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\x3C\x00\x00\x00/) {
|
||||
$reader->set_encoding('UCS-4LE');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\x3C\x00\x3F\x00/) {
|
||||
$reader->set_encoding('UTF-16LE');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\x3C\x3F\x78\x6D/) {
|
||||
# $reader->set_encoding('UTF-8');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\x3C\x3F\x78/) {
|
||||
# $reader->set_encoding('UTF-8');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\x3C\x3F/) {
|
||||
# $reader->set_encoding('UTF-8');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\x3C/) {
|
||||
# $reader->set_encoding('UTF-8');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^[\x20\x09\x0A\x0D]+\x3C[^\x3F]/) {
|
||||
# $reader->set_encoding('UTF-8');
|
||||
return;
|
||||
}
|
||||
elsif ($data =~ /^\x4C\x6F\xA7\x94/) {
|
||||
$reader->set_encoding('EBCDIC');
|
||||
return;
|
||||
}
|
||||
|
||||
warn("Unable to recognise encoding of this document");
|
||||
return;
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
67
database/perl/vendor/lib/XML/SAX/PurePerl/Exception.pm
vendored
Normal file
67
database/perl/vendor/lib/XML/SAX/PurePerl/Exception.pm
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl::Exception;
|
||||
|
||||
use strict;
|
||||
|
||||
use overload '""' => "stringify";
|
||||
|
||||
use vars qw/$StackTrace/;
|
||||
|
||||
$StackTrace = $ENV{XML_DEBUG} || 0;
|
||||
|
||||
sub throw {
|
||||
my $class = shift;
|
||||
die $class->new(@_);
|
||||
}
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
my %opts = @_;
|
||||
die "Invalid options" unless exists $opts{Message};
|
||||
|
||||
if ($opts{reader}) {
|
||||
return bless { Message => $opts{Message},
|
||||
Exception => undef, # not sure what this is for!!!
|
||||
ColumnNumber => $opts{reader}->column,
|
||||
LineNumber => $opts{reader}->line,
|
||||
PublicId => $opts{reader}->public_id,
|
||||
SystemId => $opts{reader}->system_id,
|
||||
$StackTrace ? (StackTrace => stacktrace()) : (),
|
||||
}, $class;
|
||||
}
|
||||
return bless { Message => $opts{Message},
|
||||
Exception => undef, # not sure what this is for!!!
|
||||
}, $class;
|
||||
}
|
||||
|
||||
sub stringify {
|
||||
my $self = shift;
|
||||
local $^W;
|
||||
return $self->{Message} . " [Ln: " . $self->{LineNumber} .
|
||||
", Col: " . $self->{ColumnNumber} . "]" .
|
||||
($StackTrace ? stackstring($self->{StackTrace}) : "") . "\n";
|
||||
}
|
||||
|
||||
sub stacktrace {
|
||||
my $i = 2;
|
||||
my @fulltrace;
|
||||
while (my @trace = caller($i++)) {
|
||||
my %hash;
|
||||
@hash{qw(Package Filename Line)} = @trace[0..2];
|
||||
push @fulltrace, \%hash;
|
||||
}
|
||||
return \@fulltrace;
|
||||
}
|
||||
|
||||
sub stackstring {
|
||||
my $stacktrace = shift;
|
||||
my $string = "\nFrom:\n";
|
||||
foreach my $current (@$stacktrace) {
|
||||
$string .= $current->{Filename} . " Line: " . $current->{Line} . "\n";
|
||||
}
|
||||
return $string;
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
28
database/perl/vendor/lib/XML/SAX/PurePerl/NoUnicodeExt.pm
vendored
Normal file
28
database/perl/vendor/lib/XML/SAX/PurePerl/NoUnicodeExt.pm
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl;
|
||||
use strict;
|
||||
|
||||
sub chr_ref {
|
||||
my $n = shift;
|
||||
if ($n < 0x80) {
|
||||
return chr ($n);
|
||||
}
|
||||
elsif ($n < 0x800) {
|
||||
return pack ("CC", (($n >> 6) | 0xc0), (($n & 0x3f) | 0x80));
|
||||
}
|
||||
elsif ($n < 0x10000) {
|
||||
return pack ("CCC", (($n >> 12) | 0xe0), ((($n >> 6) & 0x3f) | 0x80),
|
||||
(($n & 0x3f) | 0x80));
|
||||
}
|
||||
elsif ($n < 0x110000)
|
||||
{
|
||||
return pack ("CCCC", (($n >> 18) | 0xf0), ((($n >> 12) & 0x3f) | 0x80),
|
||||
((($n >> 6) & 0x3f) | 0x80), (($n & 0x3f) | 0x80));
|
||||
}
|
||||
else {
|
||||
return undef;
|
||||
}
|
||||
}
|
||||
|
||||
1;
|
||||
147
database/perl/vendor/lib/XML/SAX/PurePerl/Productions.pm
vendored
Normal file
147
database/perl/vendor/lib/XML/SAX/PurePerl/Productions.pm
vendored
Normal file
@@ -0,0 +1,147 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl::Productions;
|
||||
|
||||
use Exporter;
|
||||
@ISA = ('Exporter');
|
||||
@EXPORT_OK = qw($S $Char $VersionNum $BaseChar $Ideographic
|
||||
$Extender $Digit $CombiningChar $EncNameStart $EncNameEnd $NameChar $CharMinusDash
|
||||
$PubidChar $Any $SingleChar);
|
||||
|
||||
### WARNING!!! All productions here must *only* match a *single* character!!! ###
|
||||
|
||||
BEGIN {
|
||||
$S = qr/[\x20\x09\x0D\x0A]/;
|
||||
|
||||
$CharMinusDash = qr/[^-]/x;
|
||||
|
||||
$Any = qr/ . /xms;
|
||||
|
||||
$VersionNum = qr/ [a-zA-Z0-9_.:-]+ /x;
|
||||
|
||||
$EncNameStart = qr/ [A-Za-z] /x;
|
||||
$EncNameEnd = qr/ [A-Za-z0-9\._-] /x;
|
||||
|
||||
$PubidChar = qr/ [\x20\x0D\x0Aa-zA-Z0-9'()\+,.\/:=\?;!*\#@\$_\%-] /x;
|
||||
|
||||
if ($] < 5.006) {
|
||||
eval <<' PERL';
|
||||
$Char = qr/^ [\x09\x0A\x0D\x20-\x7F]|([\xC0-\xFD][\x80-\xBF]+) $/x;
|
||||
|
||||
$SingleChar = qr/^$Char$/;
|
||||
|
||||
$BaseChar = qr/ [\x41-\x5A\x61-\x7A]|([\xC0-\xFD][\x80-\xBF]+) /x;
|
||||
|
||||
$Extender = qr/ \xB7 /x;
|
||||
|
||||
$Digit = qr/ [\x30-\x39] /x;
|
||||
|
||||
# can't do this one without unicode
|
||||
# $CombiningChar = qr/^$/msx;
|
||||
|
||||
$NameChar = qr/^ (?: $BaseChar | $Digit | [._:-] | $Extender )+ $/x;
|
||||
PERL
|
||||
die $@ if $@;
|
||||
}
|
||||
else {
|
||||
eval <<' PERL';
|
||||
|
||||
use utf8; # for 5.6
|
||||
|
||||
$Char = qr/^ [\x09\x0A\x0D\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}] $/x;
|
||||
|
||||
$SingleChar = qr/^$Char$/;
|
||||
|
||||
$BaseChar = qr/
|
||||
[\x{0041}-\x{005A}\x{0061}-\x{007A}\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}] |
|
||||
[\x{00F8}-\x{00FF}\x{0100}-\x{0131}\x{0134}-\x{013E}\x{0141}-\x{0148}] |
|
||||
[\x{014A}-\x{017E}\x{0180}-\x{01C3}\x{01CD}-\x{01F0}\x{01F4}-\x{01F5}] |
|
||||
[\x{01FA}-\x{0217}\x{0250}-\x{02A8}\x{02BB}-\x{02C1}\x{0386}\x{0388}-\x{038A}] |
|
||||
[\x{038C}\x{038E}-\x{03A1}\x{03A3}-\x{03CE}\x{03D0}-\x{03D6}\x{03DA}] |
|
||||
[\x{03DC}\x{03DE}\x{03E0}\x{03E2}-\x{03F3}\x{0401}-\x{040C}\x{040E}-\x{044F}] |
|
||||
[\x{0451}-\x{045C}\x{045E}-\x{0481}\x{0490}-\x{04C4}\x{04C7}-\x{04C8}] |
|
||||
[\x{04CB}-\x{04CC}\x{04D0}-\x{04EB}\x{04EE}-\x{04F5}\x{04F8}-\x{04F9}] |
|
||||
[\x{0531}-\x{0556}\x{0559}\x{0561}-\x{0586}\x{05D0}-\x{05EA}\x{05F0}-\x{05F2}] |
|
||||
[\x{0621}-\x{063A}\x{0641}-\x{064A}\x{0671}-\x{06B7}\x{06BA}-\x{06BE}] |
|
||||
[\x{06C0}-\x{06CE}\x{06D0}-\x{06D3}\x{06D5}\x{06E5}-\x{06E6}\x{0905}-\x{0939}] |
|
||||
[\x{093D}\x{0958}-\x{0961}\x{0985}-\x{098C}\x{098F}-\x{0990}] |
|
||||
[\x{0993}-\x{09A8}\x{09AA}-\x{09B0}\x{09B2}\x{09B6}-\x{09B9}\x{09DC}-\x{09DD}] |
|
||||
[\x{09DF}-\x{09E1}\x{09F0}-\x{09F1}\x{0A05}-\x{0A0A}\x{0A0F}-\x{0A10}] |
|
||||
[\x{0A13}-\x{0A28}\x{0A2A}-\x{0A30}\x{0A32}-\x{0A33}\x{0A35}-\x{0A36}] |
|
||||
[\x{0A38}-\x{0A39}\x{0A59}-\x{0A5C}\x{0A5E}\x{0A72}-\x{0A74}\x{0A85}-\x{0A8B}] |
|
||||
[\x{0A8D}\x{0A8F}-\x{0A91}\x{0A93}-\x{0AA8}\x{0AAA}-\x{0AB0}] |
|
||||
[\x{0AB2}-\x{0AB3}\x{0AB5}-\x{0AB9}\x{0ABD}\x{0AE0}\x{0B05}-\x{0B0C}] |
|
||||
[\x{0B0F}-\x{0B10}\x{0B13}-\x{0B28}\x{0B2A}-\x{0B30}\x{0B32}-\x{0B33}] |
|
||||
[\x{0B36}-\x{0B39}\x{0B3D}\x{0B5C}-\x{0B5D}\x{0B5F}-\x{0B61}\x{0B85}-\x{0B8A}] |
|
||||
[\x{0B8E}-\x{0B90}\x{0B92}-\x{0B95}\x{0B99}-\x{0B9A}\x{0B9C}] |
|
||||
[\x{0B9E}-\x{0B9F}\x{0BA3}-\x{0BA4}\x{0BA8}-\x{0BAA}\x{0BAE}-\x{0BB5}] |
|
||||
[\x{0BB7}-\x{0BB9}\x{0C05}-\x{0C0C}\x{0C0E}-\x{0C10}\x{0C12}-\x{0C28}] |
|
||||
[\x{0C2A}-\x{0C33}\x{0C35}-\x{0C39}\x{0C60}-\x{0C61}\x{0C85}-\x{0C8C}] |
|
||||
[\x{0C8E}-\x{0C90}\x{0C92}-\x{0CA8}\x{0CAA}-\x{0CB3}\x{0CB5}-\x{0CB9}\x{0CDE}] |
|
||||
[\x{0CE0}-\x{0CE1}\x{0D05}-\x{0D0C}\x{0D0E}-\x{0D10}\x{0D12}-\x{0D28}] |
|
||||
[\x{0D2A}-\x{0D39}\x{0D60}-\x{0D61}\x{0E01}-\x{0E2E}\x{0E30}\x{0E32}-\x{0E33}] |
|
||||
[\x{0E40}-\x{0E45}\x{0E81}-\x{0E82}\x{0E84}\x{0E87}-\x{0E88}\x{0E8A}] |
|
||||
[\x{0E8D}\x{0E94}-\x{0E97}\x{0E99}-\x{0E9F}\x{0EA1}-\x{0EA3}\x{0EA5}\x{0EA7}] |
|
||||
[\x{0EAA}-\x{0EAB}\x{0EAD}-\x{0EAE}\x{0EB0}\x{0EB2}-\x{0EB3}\x{0EBD}] |
|
||||
[\x{0EC0}-\x{0EC4}\x{0F40}-\x{0F47}\x{0F49}-\x{0F69}\x{10A0}-\x{10C5}] |
|
||||
[\x{10D0}-\x{10F6}\x{1100}\x{1102}-\x{1103}\x{1105}-\x{1107}\x{1109}] |
|
||||
[\x{110B}-\x{110C}\x{110E}-\x{1112}\x{113C}\x{113E}\x{1140}\x{114C}\x{114E}] |
|
||||
[\x{1150}\x{1154}-\x{1155}\x{1159}\x{115F}-\x{1161}\x{1163}\x{1165}] |
|
||||
[\x{1167}\x{1169}\x{116D}-\x{116E}\x{1172}-\x{1173}\x{1175}\x{119E}\x{11A8}] |
|
||||
[\x{11AB}\x{11AE}-\x{11AF}\x{11B7}-\x{11B8}\x{11BA}\x{11BC}-\x{11C2}] |
|
||||
[\x{11EB}\x{11F0}\x{11F9}\x{1E00}-\x{1E9B}\x{1EA0}-\x{1EF9}\x{1F00}-\x{1F15}] |
|
||||
[\x{1F18}-\x{1F1D}\x{1F20}-\x{1F45}\x{1F48}-\x{1F4D}\x{1F50}-\x{1F57}] |
|
||||
[\x{1F59}\x{1F5B}\x{1F5D}\x{1F5F}-\x{1F7D}\x{1F80}-\x{1FB4}\x{1FB6}-\x{1FBC}] |
|
||||
[\x{1FBE}\x{1FC2}-\x{1FC4}\x{1FC6}-\x{1FCC}\x{1FD0}-\x{1FD3}] |
|
||||
[\x{1FD6}-\x{1FDB}\x{1FE0}-\x{1FEC}\x{1FF2}-\x{1FF4}\x{1FF6}-\x{1FFC}] |
|
||||
[\x{2126}\x{212A}-\x{212B}\x{212E}\x{2180}-\x{2182}\x{3041}-\x{3094}] |
|
||||
[\x{30A1}-\x{30FA}\x{3105}-\x{312C}\x{AC00}-\x{D7A3}]
|
||||
/x;
|
||||
|
||||
$Extender = qr/
|
||||
[\x{00B7}\x{02D0}\x{02D1}\x{0387}\x{0640}\x{0E46}\x{0EC6}\x{3005}\x{3031}-\x{3035}\x{309D}-\x{309E}\x{30FC}-\x{30FE}]
|
||||
/x;
|
||||
|
||||
$Digit = qr/
|
||||
[\x{0030}-\x{0039}\x{0660}-\x{0669}\x{06F0}-\x{06F9}\x{0966}-\x{096F}] |
|
||||
[\x{09E6}-\x{09EF}\x{0A66}-\x{0A6F}\x{0AE6}-\x{0AEF}\x{0B66}-\x{0B6F}] |
|
||||
[\x{0BE7}-\x{0BEF}\x{0C66}-\x{0C6F}\x{0CE6}-\x{0CEF}\x{0D66}-\x{0D6F}] |
|
||||
[\x{0E50}-\x{0E59}\x{0ED0}-\x{0ED9}\x{0F20}-\x{0F29}]
|
||||
/x;
|
||||
|
||||
$CombiningChar = qr/
|
||||
[\x{0300}-\x{0345}\x{0360}-\x{0361}\x{0483}-\x{0486}\x{0591}-\x{05A1}] |
|
||||
[\x{05A3}-\x{05B9}\x{05BB}-\x{05BD}\x{05BF}\x{05C1}-\x{05C2}\x{05C4}] |
|
||||
[\x{064B}-\x{0652}\x{0670}\x{06D6}-\x{06DC}\x{06DD}-\x{06DF}\x{06E0}-\x{06E4}] |
|
||||
[\x{06E7}-\x{06E8}\x{06EA}-\x{06ED}\x{0901}-\x{0903}\x{093C}] |
|
||||
[\x{093E}-\x{094C}\x{094D}\x{0951}-\x{0954}\x{0962}-\x{0963}\x{0981}-\x{0983}] |
|
||||
[\x{09BC}\x{09BE}\x{09BF}\x{09C0}-\x{09C4}\x{09C7}-\x{09C8}] |
|
||||
[\x{09CB}-\x{09CD}\x{09D7}\x{09E2}-\x{09E3}\x{0A02}\x{0A3C}\x{0A3E}\x{0A3F}] |
|
||||
[\x{0A40}-\x{0A42}\x{0A47}-\x{0A48}\x{0A4B}-\x{0A4D}\x{0A70}-\x{0A71}] |
|
||||
[\x{0A81}-\x{0A83}\x{0ABC}\x{0ABE}-\x{0AC5}\x{0AC7}-\x{0AC9}\x{0ACB}-\x{0ACD}] |
|
||||
[\x{0B01}-\x{0B03}\x{0B3C}\x{0B3E}-\x{0B43}\x{0B47}-\x{0B48}] |
|
||||
[\x{0B4B}-\x{0B4D}\x{0B56}-\x{0B57}\x{0B82}-\x{0B83}\x{0BBE}-\x{0BC2}] |
|
||||
[\x{0BC6}-\x{0BC8}\x{0BCA}-\x{0BCD}\x{0BD7}\x{0C01}-\x{0C03}\x{0C3E}-\x{0C44}] |
|
||||
[\x{0C46}-\x{0C48}\x{0C4A}-\x{0C4D}\x{0C55}-\x{0C56}\x{0C82}-\x{0C83}] |
|
||||
[\x{0CBE}-\x{0CC4}\x{0CC6}-\x{0CC8}\x{0CCA}-\x{0CCD}\x{0CD5}-\x{0CD6}] |
|
||||
[\x{0D02}-\x{0D03}\x{0D3E}-\x{0D43}\x{0D46}-\x{0D48}\x{0D4A}-\x{0D4D}\x{0D57}] |
|
||||
[\x{0E31}\x{0E34}-\x{0E3A}\x{0E47}-\x{0E4E}\x{0EB1}\x{0EB4}-\x{0EB9}] |
|
||||
[\x{0EBB}-\x{0EBC}\x{0EC8}-\x{0ECD}\x{0F18}-\x{0F19}\x{0F35}\x{0F37}\x{0F39}] |
|
||||
[\x{0F3E}\x{0F3F}\x{0F71}-\x{0F84}\x{0F86}-\x{0F8B}\x{0F90}-\x{0F95}] |
|
||||
[\x{0F97}\x{0F99}-\x{0FAD}\x{0FB1}-\x{0FB7}\x{0FB9}\x{20D0}-\x{20DC}\x{20E1}] |
|
||||
[\x{302A}-\x{302F}\x{3099}\x{309A}]
|
||||
/x;
|
||||
|
||||
$Ideographic = qr/
|
||||
[\x{4E00}-\x{9FA5}\x{3007}\x{3021}-\x{3029}]
|
||||
/x;
|
||||
|
||||
$NameChar = qr/^ (?: $BaseChar | $Ideographic | $Digit | [._:-] | $CombiningChar | $Extender )+ $/x;
|
||||
PERL
|
||||
|
||||
die $@ if $@;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
1;
|
||||
136
database/perl/vendor/lib/XML/SAX/PurePerl/Reader.pm
vendored
Normal file
136
database/perl/vendor/lib/XML/SAX/PurePerl/Reader.pm
vendored
Normal file
@@ -0,0 +1,136 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl::Reader;
|
||||
|
||||
use strict;
|
||||
use XML::SAX::PurePerl::Reader::URI;
|
||||
use Exporter ();
|
||||
|
||||
use vars qw(@ISA @EXPORT_OK);
|
||||
@ISA = qw(Exporter);
|
||||
@EXPORT_OK = qw(
|
||||
EOF
|
||||
BUFFER
|
||||
LINE
|
||||
COLUMN
|
||||
ENCODING
|
||||
XML_VERSION
|
||||
);
|
||||
|
||||
use constant EOF => 0;
|
||||
use constant BUFFER => 1;
|
||||
use constant LINE => 2;
|
||||
use constant COLUMN => 3;
|
||||
use constant ENCODING => 4;
|
||||
use constant SYSTEM_ID => 5;
|
||||
use constant PUBLIC_ID => 6;
|
||||
use constant XML_VERSION => 7;
|
||||
|
||||
require XML::SAX::PurePerl::Reader::Stream;
|
||||
require XML::SAX::PurePerl::Reader::String;
|
||||
|
||||
if ($] >= 5.007002) {
|
||||
require XML::SAX::PurePerl::Reader::UnicodeExt;
|
||||
}
|
||||
else {
|
||||
require XML::SAX::PurePerl::Reader::NoUnicodeExt;
|
||||
}
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
my $thing = shift;
|
||||
|
||||
# try to figure if this $thing is a handle of some sort
|
||||
if (ref($thing) && UNIVERSAL::isa($thing, 'IO::Handle')) {
|
||||
return XML::SAX::PurePerl::Reader::Stream->new($thing)->init;
|
||||
}
|
||||
my $ioref;
|
||||
if (tied($thing)) {
|
||||
my $class = ref($thing);
|
||||
no strict 'refs';
|
||||
$ioref = $thing if defined &{"${class}::TIEHANDLE"};
|
||||
}
|
||||
else {
|
||||
eval {
|
||||
$ioref = *{$thing}{IO};
|
||||
};
|
||||
undef $@;
|
||||
}
|
||||
if ($ioref) {
|
||||
return XML::SAX::PurePerl::Reader::Stream->new($thing)->init;
|
||||
}
|
||||
|
||||
if ($thing =~ /</) {
|
||||
# assume it's a string
|
||||
return XML::SAX::PurePerl::Reader::String->new($thing)->init;
|
||||
}
|
||||
|
||||
# assume it is a uri
|
||||
return XML::SAX::PurePerl::Reader::URI->new($thing)->init;
|
||||
}
|
||||
|
||||
sub init {
|
||||
my $self = shift;
|
||||
$self->[LINE] = 1;
|
||||
$self->[COLUMN] = 1;
|
||||
$self->read_more;
|
||||
return $self;
|
||||
}
|
||||
|
||||
sub data {
|
||||
my ($self, $min_length) = (@_, 1);
|
||||
if (length($self->[BUFFER]) < $min_length) {
|
||||
$self->read_more;
|
||||
}
|
||||
return $self->[BUFFER];
|
||||
}
|
||||
|
||||
sub match {
|
||||
my ($self, $char) = @_;
|
||||
my $data = $self->data;
|
||||
if (substr($data, 0, 1) eq $char) {
|
||||
$self->move_along(1);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub public_id {
|
||||
my $self = shift;
|
||||
@_ and $self->[PUBLIC_ID] = shift;
|
||||
$self->[PUBLIC_ID];
|
||||
}
|
||||
|
||||
sub system_id {
|
||||
my $self = shift;
|
||||
@_ and $self->[SYSTEM_ID] = shift;
|
||||
$self->[SYSTEM_ID];
|
||||
}
|
||||
|
||||
sub line {
|
||||
shift->[LINE];
|
||||
}
|
||||
|
||||
sub column {
|
||||
shift->[COLUMN];
|
||||
}
|
||||
|
||||
sub get_encoding {
|
||||
my $self = shift;
|
||||
return $self->[ENCODING];
|
||||
}
|
||||
|
||||
sub get_xml_version {
|
||||
my $self = shift;
|
||||
return $self->[XML_VERSION];
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
XML::Parser::PurePerl::Reader - Abstract Reader factory class
|
||||
|
||||
=cut
|
||||
25
database/perl/vendor/lib/XML/SAX/PurePerl/Reader/NoUnicodeExt.pm
vendored
Normal file
25
database/perl/vendor/lib/XML/SAX/PurePerl/Reader/NoUnicodeExt.pm
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl::Reader;
|
||||
use strict;
|
||||
|
||||
sub set_raw_stream {
|
||||
# no-op
|
||||
}
|
||||
|
||||
sub switch_encoding_stream {
|
||||
my ($fh, $encoding) = @_;
|
||||
throw XML::SAX::Exception::Parse (
|
||||
Message => "Only ASCII encoding allowed without perl 5.7.2 or higher. You tried: $encoding",
|
||||
) if $encoding !~ /(ASCII|UTF\-?8)/i;
|
||||
}
|
||||
|
||||
sub switch_encoding_string {
|
||||
my (undef, $encoding) = @_;
|
||||
throw XML::SAX::Exception::Parse (
|
||||
Message => "Only ASCII encoding allowed without perl 5.7.2 or higher. You tried: $encoding",
|
||||
) if $encoding !~ /(ASCII|UTF\-?8)/i;
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
84
database/perl/vendor/lib/XML/SAX/PurePerl/Reader/Stream.pm
vendored
Normal file
84
database/perl/vendor/lib/XML/SAX/PurePerl/Reader/Stream.pm
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl::Reader::Stream;
|
||||
|
||||
use strict;
|
||||
use vars qw(@ISA);
|
||||
|
||||
use XML::SAX::PurePerl::Reader qw(
|
||||
EOF
|
||||
BUFFER
|
||||
LINE
|
||||
COLUMN
|
||||
ENCODING
|
||||
XML_VERSION
|
||||
);
|
||||
use XML::SAX::Exception;
|
||||
|
||||
@ISA = ('XML::SAX::PurePerl::Reader');
|
||||
|
||||
# subclassed by adding 1 to last element
|
||||
use constant FH => 8;
|
||||
use constant BUFFER_SIZE => 4096;
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
my $ioref = shift;
|
||||
XML::SAX::PurePerl::Reader::set_raw_stream($ioref);
|
||||
my @parts;
|
||||
@parts[FH, LINE, COLUMN, BUFFER, EOF, XML_VERSION] =
|
||||
($ioref, 1, 0, '', 0, '1.0');
|
||||
return bless \@parts, $class;
|
||||
}
|
||||
|
||||
sub read_more {
|
||||
my $self = shift;
|
||||
my $buf;
|
||||
my $bytesread = read($self->[FH], $buf, BUFFER_SIZE);
|
||||
if ($bytesread) {
|
||||
$self->[BUFFER] .= $buf;
|
||||
return 1;
|
||||
}
|
||||
elsif (defined($bytesread)) {
|
||||
$self->[EOF]++;
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
throw XML::SAX::Exception::Parse(
|
||||
Message => "Error reading from filehandle: $!",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
sub move_along {
|
||||
my $self = shift;
|
||||
my $discarded = substr($self->[BUFFER], 0, $_[0], '');
|
||||
|
||||
# Wish I could skip this lot - tells us where we are in the file
|
||||
my $lines = $discarded =~ tr/\n//;
|
||||
$self->[LINE] += $lines;
|
||||
if ($lines) {
|
||||
$discarded =~ /\n([^\n]*)$/;
|
||||
$self->[COLUMN] = length($1);
|
||||
}
|
||||
else {
|
||||
$self->[COLUMN] += $_[0];
|
||||
}
|
||||
}
|
||||
|
||||
sub set_encoding {
|
||||
my $self = shift;
|
||||
my ($encoding) = @_;
|
||||
# warn("set encoding to: $encoding\n");
|
||||
XML::SAX::PurePerl::Reader::switch_encoding_stream($self->[FH], $encoding);
|
||||
XML::SAX::PurePerl::Reader::switch_encoding_string($self->[BUFFER], $encoding);
|
||||
$self->[ENCODING] = $encoding;
|
||||
}
|
||||
|
||||
sub bytepos {
|
||||
my $self = shift;
|
||||
tell($self->[FH]);
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
78
database/perl/vendor/lib/XML/SAX/PurePerl/Reader/String.pm
vendored
Normal file
78
database/perl/vendor/lib/XML/SAX/PurePerl/Reader/String.pm
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl::Reader::String;
|
||||
|
||||
use strict;
|
||||
use vars qw(@ISA);
|
||||
|
||||
use XML::SAX::PurePerl::Reader qw(
|
||||
LINE
|
||||
COLUMN
|
||||
BUFFER
|
||||
ENCODING
|
||||
EOF
|
||||
);
|
||||
|
||||
@ISA = ('XML::SAX::PurePerl::Reader');
|
||||
|
||||
use constant DISCARDED => 8;
|
||||
use constant STRING => 9;
|
||||
use constant USED => 10;
|
||||
use constant CHUNK_SIZE => 2048;
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
my $string = shift;
|
||||
my @parts;
|
||||
@parts[BUFFER, EOF, LINE, COLUMN, DISCARDED, STRING, USED] =
|
||||
('', 0, 1, 0, 0, $string, 0);
|
||||
return bless \@parts, $class;
|
||||
}
|
||||
|
||||
sub read_more () {
|
||||
my $self = shift;
|
||||
if ($self->[USED] >= length($self->[STRING])) {
|
||||
$self->[EOF]++;
|
||||
return 0;
|
||||
}
|
||||
my $bytes = CHUNK_SIZE;
|
||||
if ($bytes > (length($self->[STRING]) - $self->[USED])) {
|
||||
$bytes = (length($self->[STRING]) - $self->[USED]);
|
||||
}
|
||||
$self->[BUFFER] .= substr($self->[STRING], $self->[USED], $bytes);
|
||||
$self->[USED] += $bytes;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
sub move_along {
|
||||
my($self, $bytes) = @_;
|
||||
my $discarded = substr($self->[BUFFER], 0, $bytes, '');
|
||||
$self->[DISCARDED] += length($discarded);
|
||||
|
||||
# Wish I could skip this lot - tells us where we are in the file
|
||||
my $lines = $discarded =~ tr/\n//;
|
||||
$self->[LINE] += $lines;
|
||||
if ($lines) {
|
||||
$discarded =~ /\n([^\n]*)$/;
|
||||
$self->[COLUMN] = length($1);
|
||||
}
|
||||
else {
|
||||
$self->[COLUMN] += $_[0];
|
||||
}
|
||||
}
|
||||
|
||||
sub set_encoding {
|
||||
my $self = shift;
|
||||
my ($encoding) = @_;
|
||||
|
||||
XML::SAX::PurePerl::Reader::switch_encoding_string($self->[BUFFER], $encoding, "utf-8");
|
||||
$self->[ENCODING] = $encoding;
|
||||
}
|
||||
|
||||
sub bytepos {
|
||||
my $self = shift;
|
||||
$self->[DISCARDED];
|
||||
}
|
||||
|
||||
1;
|
||||
57
database/perl/vendor/lib/XML/SAX/PurePerl/Reader/URI.pm
vendored
Normal file
57
database/perl/vendor/lib/XML/SAX/PurePerl/Reader/URI.pm
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl::Reader::URI;
|
||||
|
||||
use strict;
|
||||
|
||||
use XML::SAX::PurePerl::Reader;
|
||||
use File::Temp qw(tempfile);
|
||||
use Symbol;
|
||||
|
||||
## NOTE: This is *not* a subclass of Reader. It just returns Stream or String
|
||||
## Reader objects depending on what it's capabilities are.
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
my $uri = shift;
|
||||
# request the URI
|
||||
if (-e $uri && -f _) {
|
||||
my $fh = gensym;
|
||||
open($fh, $uri) || die "Cannot open file $uri : $!";
|
||||
return XML::SAX::PurePerl::Reader::Stream->new($fh);
|
||||
}
|
||||
elsif ($uri =~ /^file:(.*)$/ && -e $1 && -f _) {
|
||||
my $file = $1;
|
||||
my $fh = gensym;
|
||||
open($fh, $file) || die "Cannot open file $file : $!";
|
||||
return XML::SAX::PurePerl::Reader::Stream->new($fh);
|
||||
}
|
||||
else {
|
||||
# request URI, return String reader
|
||||
require LWP::UserAgent;
|
||||
my $ua = LWP::UserAgent->new;
|
||||
$ua->agent("Perl/XML/SAX/PurePerl/1.0 " . $ua->agent);
|
||||
|
||||
my $req = HTTP::Request->new(GET => $uri);
|
||||
|
||||
my $fh = tempfile();
|
||||
|
||||
my $callback = sub {
|
||||
my ($data, $response, $protocol) = @_;
|
||||
print $fh $data;
|
||||
};
|
||||
|
||||
my $res = $ua->request($req, $callback, 4096);
|
||||
|
||||
if ($res->is_success) {
|
||||
seek($fh, 0, 0);
|
||||
return XML::SAX::PurePerl::Reader::Stream->new($fh);
|
||||
}
|
||||
else {
|
||||
die "LWP Request Failed";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
1;
|
||||
23
database/perl/vendor/lib/XML/SAX/PurePerl/Reader/UnicodeExt.pm
vendored
Normal file
23
database/perl/vendor/lib/XML/SAX/PurePerl/Reader/UnicodeExt.pm
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl::Reader;
|
||||
use strict;
|
||||
|
||||
use Encode ();
|
||||
|
||||
sub set_raw_stream {
|
||||
my ($fh) = @_;
|
||||
binmode($fh, ":bytes");
|
||||
}
|
||||
|
||||
sub switch_encoding_stream {
|
||||
my ($fh, $encoding) = @_;
|
||||
binmode($fh, ":encoding($encoding)");
|
||||
}
|
||||
|
||||
sub switch_encoding_string {
|
||||
$_[0] = Encode::decode($_[1], $_[0]);
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
22
database/perl/vendor/lib/XML/SAX/PurePerl/UnicodeExt.pm
vendored
Normal file
22
database/perl/vendor/lib/XML/SAX/PurePerl/UnicodeExt.pm
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl;
|
||||
use strict;
|
||||
|
||||
no warnings 'utf8';
|
||||
|
||||
sub chr_ref {
|
||||
return chr(shift);
|
||||
}
|
||||
|
||||
if ($] >= 5.007002) {
|
||||
require Encode;
|
||||
|
||||
Encode::define_alias( "UTF-16" => "UCS-2" );
|
||||
Encode::define_alias( "UTF-16BE" => "UCS-2" );
|
||||
Encode::define_alias( "UTF-16LE" => "ucs-2le" );
|
||||
Encode::define_alias( "UTF16LE" => "ucs-2le" );
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
129
database/perl/vendor/lib/XML/SAX/PurePerl/XMLDecl.pm
vendored
Normal file
129
database/perl/vendor/lib/XML/SAX/PurePerl/XMLDecl.pm
vendored
Normal file
@@ -0,0 +1,129 @@
|
||||
# $Id$
|
||||
|
||||
package XML::SAX::PurePerl;
|
||||
|
||||
use strict;
|
||||
use XML::SAX::PurePerl::Productions qw($S $VersionNum $EncNameStart $EncNameEnd);
|
||||
|
||||
sub XMLDecl {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(5);
|
||||
# warn("Looking for xmldecl in: $data");
|
||||
if ($data =~ /^<\?xml$S/o) {
|
||||
$reader->move_along(5);
|
||||
$self->skip_whitespace($reader);
|
||||
|
||||
# get version attribute
|
||||
$self->VersionInfo($reader) ||
|
||||
$self->parser_error("XML Declaration lacks required version attribute, or version attribute does not match XML specification", $reader);
|
||||
|
||||
if (!$self->skip_whitespace($reader)) {
|
||||
my $data = $reader->data(2);
|
||||
$data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
|
||||
$reader->move_along(2);
|
||||
return;
|
||||
}
|
||||
|
||||
if ($self->EncodingDecl($reader)) {
|
||||
if (!$self->skip_whitespace($reader)) {
|
||||
my $data = $reader->data(2);
|
||||
$data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
|
||||
$reader->move_along(2);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
$self->SDDecl($reader);
|
||||
|
||||
$self->skip_whitespace($reader);
|
||||
|
||||
my $data = $reader->data(2);
|
||||
$data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
|
||||
$reader->move_along(2);
|
||||
}
|
||||
else {
|
||||
# warn("first 5 bytes: ", join(',', unpack("CCCCC", $data)), "\n");
|
||||
# no xml decl
|
||||
if (!$reader->get_encoding) {
|
||||
$reader->set_encoding("UTF-8");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub VersionInfo {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(11);
|
||||
|
||||
# warn("Looking for version in $data");
|
||||
|
||||
$data =~ /^(version$S*=$S*(["'])($VersionNum)\2)/o or return 0;
|
||||
$reader->move_along(length($1));
|
||||
my $vernum = $3;
|
||||
|
||||
if ($vernum ne "1.0") {
|
||||
$self->parser_error("Only XML version 1.0 supported. Saw: '$vernum'", $reader);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub SDDecl {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(15);
|
||||
|
||||
$data =~ /^(standalone$S*=$S*(["'])(yes|no)\2)/o or return 0;
|
||||
$reader->move_along(length($1));
|
||||
my $yesno = $3;
|
||||
|
||||
if ($yesno eq 'yes') {
|
||||
$self->{standalone} = 1;
|
||||
}
|
||||
else {
|
||||
$self->{standalone} = 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub EncodingDecl {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(12);
|
||||
|
||||
$data =~ /^(encoding$S*=$S*(["'])($EncNameStart$EncNameEnd*)\2)/o or return 0;
|
||||
$reader->move_along(length($1));
|
||||
my $encoding = $3;
|
||||
|
||||
$reader->set_encoding($encoding);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub TextDecl {
|
||||
my ($self, $reader) = @_;
|
||||
|
||||
my $data = $reader->data(6);
|
||||
$data =~ /^<\?xml$S+/ or return;
|
||||
$reader->move_along(5);
|
||||
$self->skip_whitespace($reader);
|
||||
|
||||
if ($self->VersionInfo($reader)) {
|
||||
$self->skip_whitespace($reader) ||
|
||||
$self->parser_error("Lack of whitespace after version attribute in text declaration", $reader);
|
||||
}
|
||||
|
||||
$self->EncodingDecl($reader) ||
|
||||
$self->parser_error("Encoding declaration missing from external entity text declaration", $reader);
|
||||
|
||||
$self->skip_whitespace($reader);
|
||||
|
||||
$data = $reader->data(2);
|
||||
$data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
1;
|
||||
Reference in New Issue
Block a user