Initial Commit

This commit is contained in:
Riley Schneider
2025-12-03 16:38:10 +01:00
parent c5e26bf594
commit b732d8d4b5
17680 changed files with 5977495 additions and 2 deletions

View File

@@ -0,0 +1,141 @@
=head1 NAME
XML::LibXML::Attr - XML::LibXML Attribute Class
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Attribute nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
$attr = XML::LibXML::Attr->new($name [,$value]);
$string = $attr->getValue();
$string = $attr->value;
$attr->setValue( $string );
$node = $attr->getOwnerElement();
$attr->setNamespace($nsURI, $prefix);
$bool = $attr->isId;
$string = $attr->serializeContent;
=head1 DESCRIPTION
This is the interface to handle Attributes like ordinary nodes. The naming of
the class relies on the W3C DOM documentation.
=head1 METHODS
The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here.
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$attr = XML::LibXML::Attr->new($name [,$value]);
Class constructor. If you need to work with ISO encoded strings, you should I<<<<<< always >>>>>> use the C<<<<<< createAttribute >>>>>> of L<<<<<< XML::LibXML::Document >>>>>>.
=item getValue
$string = $attr->getValue();
Returns the value stored for the attribute. If undef is returned, the attribute
has no value, which is different of being C<<<<<< not specified >>>>>>.
=item value
$string = $attr->value;
Alias for I<<<<<< getValue() >>>>>>
=item setValue
$attr->setValue( $string );
This is needed to set a new attribute value. If ISO encoded strings are passed
as parameter, the node has to be bound to a document, otherwise the encoding
might be done incorrectly.
=item getOwnerElement
$node = $attr->getOwnerElement();
returns the node the attribute belongs to. If the attribute is not bound to a
node, undef will be returned. Overwriting the underlying implementation, the I<<<<<< parentNode >>>>>> function will return undef, instead of the owner element.
=item setNamespace
$attr->setNamespace($nsURI, $prefix);
This function tries to bound the attribute to a given namespace. If C<<<<<< $nsURI >>>>>> is undefined or empty, the function discards any previous association of the
attribute with a namespace. If the namespace was not previously declared in the
context of the attribute, this function will fail. In this case you may wish to
call setNamespace() on the ownerElement. If the namespace URI is non-empty and
declared in the context of the attribute, but only with a different (non-empty)
prefix, then the attribute is still bound to the namespace but gets a different
prefix than C<<<<<< $prefix >>>>>>. The function also fails if the prefix is empty but the namespace URI is not
(because unprefixed attributes should by definition belong to no namespace).
This function returns 1 on success, 0 otherwise.
=item isId
$bool = $attr->isId;
Determine whether an attribute is of type ID. For documents with a DTD, this
information is only available if DTD loading/validation has been requested. For
HTML documents parsed with the HTML parser ID detection is done automatically.
In XML documents, all "xml:id" attributes are considered to be of type ID.
=item serializeContent($docencoding)
$string = $attr->serializeContent;
This function is not part of DOM API. It returns attribute content in the form
in which it serializes into XML, that is with all meta-characters properly
quoted and with raw entity references (except for entities expanded during
parse time). Setting the optional $docencoding flag to 1 enforces document
encoding for the output string (which is then passed to Perl as a byte string).
Otherwise the string is passed to Perl as (UTF-8 encoded) characters.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,215 @@
package XML::LibXML::AttributeHash;
use strict;
use warnings;
use Scalar::Util qw//;
use Tie::Hash;
our @ISA = qw/Tie::Hash/;
use vars qw($VERSION);
$VERSION = "2.0206"; # VERSION TEMPLATE: DO NOT CHANGE
BEGIN
{
*__HAS_WEAKEN = defined(&Scalar::Util::weaken)
? sub () { 1 }
: sub () { 0 };
};
sub element
{
return $_[0][0];
}
sub from_clark
{
my ($self, $str) = @_;
if ($str =~ m! \{ (.+) \} (.+) !x)
{
return ($1, $2);
}
return (undef, $str);
}
sub to_clark
{
my ($self, $ns, $local) = @_;
defined $ns ? "{$ns}$local" : $local;
}
sub all_keys
{
my ($self, @keys) = @_;
my $elem = $self->element;
foreach my $attr (defined($elem) ? $elem->attributes : ())
{
if (! $attr->isa('XML::LibXML::Namespace'))
{
push @keys, $self->to_clark($attr->namespaceURI, $attr->localname);
}
}
return sort @keys;
}
sub TIEHASH
{
my ($class, $element, %args) = @_;
my $self = bless [$element, undef, \%args], $class;
if (__HAS_WEAKEN and $args{weaken})
{
Scalar::Util::weaken( $self->[0] );
}
return $self;
}
sub STORE
{
my ($self, $key, $value) = @_;
my ($key_ns, $key_local) = $self->from_clark($key);
if (defined $key_ns)
{
return $self->element->setAttributeNS($key_ns, "xxx:$key_local", "$value");
}
else
{
return $self->element->setAttribute($key_local, "$value");
}
}
sub FETCH
{
my ($self, $key) = @_;
my ($key_ns, $key_local) = $self->from_clark($key);
if (defined $key_ns)
{
return $self->element->getAttributeNS($key_ns, "$key_local");
}
else
{
return $self->element->getAttribute($key_local);
}
}
sub EXISTS
{
my ($self, $key) = @_;
my ($key_ns, $key_local) = $self->from_clark($key);
if (defined $key_ns)
{
return $self->element->hasAttributeNS($key_ns, "$key_local");
}
else
{
return $self->element->hasAttribute($key_local);
}
}
sub DELETE
{
my ($self, $key) = @_;
my ($key_ns, $key_local) = $self->from_clark($key);
if (defined $key_ns)
{
return $self->element->removeAttributeNS($key_ns, "$key_local");
}
else
{
return $self->element->removeAttribute($key_local);
}
}
sub FIRSTKEY
{
my ($self) = @_;
my @keys = $self->all_keys;
$self->[1] = \@keys;
if (wantarray)
{
return ($keys[0], $self->FETCH($keys[0]));
}
$keys[0];
}
sub NEXTKEY
{
my ($self, $lastkey) = @_;
my @keys = defined $self->[1] ? @{ $self->[1] } : $self->all_keys;
my $found;
foreach my $k (@keys)
{
if ($k gt $lastkey)
{
$found = $k and last;
}
}
if (!defined $found)
{
$self->[1] = undef;
return;
}
if (wantarray)
{
return ($found, $self->FETCH($found));
}
return $found;
}
sub SCALAR
{
my ($self) = @_;
return $self->element;
}
sub CLEAR
{
my ($self) = @_;
foreach my $k ($self->all_keys)
{
$self->DELETE($k);
}
return $self;
}
__PACKAGE__
__END__
=head1 NAME
XML::LibXML::AttributeHash - tie an XML::LibXML::Element to a hash to access its attributes
=head1 SYNOPSIS
tie my %hash, 'XML::LibXML::AttributeHash', $element;
$hash{'href'} = 'http://example.com/';
print $element->getAttribute('href') . "\n";
=head1 DESCRIPTION
This class allows an element's attributes to be accessed as if they were a
plain old Perl hash. Attribute names become hash keys. Namespaced attributes
are keyed using Clark notation.
my $XLINK = 'http://www.w3.org/1999/xlink';
tie my %hash, 'XML::LibXML::AttributeHash', $element;
$hash{"{$XLINK}href"} = 'http://localhost/';
print $element->getAttributeNS($XLINK, 'href') . "\n";
There is rarely any need to use XML::LibXML::AttributeHash directly. In
general, it is possible to take advantage of XML::LibXML::Element's
overloading. The example in the SYNOPSIS could have been written:
$element->{'href'} = 'http://example.com/';
print $element->getAttribute('href') . "\n";
The tie interface allows the passing of additional arguments to
XML::LibXML::AttributeHash:
tie my %hash, 'XML::LibXML::AttributeHash', $element, %args;
Currently only one argument is supported, the boolean "weaken" which (if
true) indicates that the tied object's reference to the element should be
a weak reference. This is used by XML::LibXML::Element's overloading. The
"weaken" argument is ignored if you don't have a working Scalar::Util::weaken.

View File

@@ -0,0 +1,93 @@
# $Id$
#
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::Boolean;
use XML::LibXML::Number;
use XML::LibXML::Literal;
use strict;
use warnings;
use vars qw ($VERSION);
$VERSION = "2.0206"; # VERSION TEMPLATE: DO NOT CHANGE
use overload
'""' => \&value,
'<=>' => \&cmp;
sub new {
my $class = shift;
my ($param) = @_;
my $val = $param ? 1 : 0;
bless \$val, $class;
}
sub True {
my $class = shift;
my $val = 1;
bless \$val, $class;
}
sub False {
my $class = shift;
my $val = 0;
bless \$val, $class;
}
sub value {
my $self = shift;
$$self;
}
sub cmp {
my $self = shift;
my ($other, $swap) = @_;
if ($swap) {
return $other <=> $$self;
}
return $$self <=> $other;
}
sub to_number { XML::LibXML::Number->new($_[0]->value); }
sub to_boolean { $_[0]; }
sub to_literal { XML::LibXML::Literal->new($_[0]->value ? "true" : "false"); }
sub string_value { return $_[0]->to_literal->value; }
1;
__END__
=head1 NAME
XML::LibXML::Boolean - Boolean true/false values
=head1 DESCRIPTION
XML::LibXML::Boolean objects implement simple boolean true/false objects.
=head1 API
=head2 XML::LibXML::Boolean->True
Creates a new Boolean object with a true value.
=head2 XML::LibXML::Boolean->False
Creates a new Boolean object with a false value.
=head2 value()
Returns true or false.
=head2 to_literal()
Returns the string "true" or "false".
=cut

View File

@@ -0,0 +1,65 @@
=head1 NAME
XML::LibXML::CDATASection - XML::LibXML Class for CDATA Sections
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to CDATA nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
$node = XML::LibXML::CDATASection->new( $content );
=head1 DESCRIPTION
This class provides all functions of L<<<<<< XML::LibXML::Text >>>>>>, but for CDATA nodes.
=head1 METHODS
The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here.
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$node = XML::LibXML::CDATASection->new( $content );
The constructor is the only provided function for this package. It is required,
because I<<<<<< libxml2 >>>>>> treats the different text node types slightly differently.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,66 @@
=head1 NAME
XML::LibXML::Comment - XML::LibXML Comment Class
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Comment nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
$node = XML::LibXML::Comment->new( $content );
=head1 DESCRIPTION
This class provides all functions of L<<<<<< XML::LibXML::Text >>>>>>, but for comment nodes. This can be done, since only the output of the node
types is different, but not the data structure. :-)
=head1 METHODS
The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here.
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$node = XML::LibXML::Comment->new( $content );
The constructor is the only provided function for this package. It is required,
because I<<<<<< libxml2 >>>>>> treats text nodes and comment nodes slightly differently.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,204 @@
#-------------------------------------------------------------------------#
# $Id: Common.pm,v 1.5 2003/02/27 18:32:59 phish108 Exp $
#
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
#-------------------------------------------------------------------------#
package XML::LibXML::Common;
#-------------------------------------------------------------------------#
# global blur #
#-------------------------------------------------------------------------#
use strict;
use warnings;
require Exporter;
require DynaLoader;
use vars qw( @ISA $VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS);
@ISA = qw(Exporter);
$VERSION = "2.0206"; # VERSION TEMPLATE: DO NOT CHANGE
use XML::LibXML qw(:libxml);
#-------------------------------------------------------------------------#
# export information #
#-------------------------------------------------------------------------#
%EXPORT_TAGS = (
all => [qw(
ELEMENT_NODE
ATTRIBUTE_NODE
TEXT_NODE
CDATA_SECTION_NODE
ENTITY_REFERENCE_NODE
ENTITY_NODE
PI_NODE
PROCESSING_INSTRUCTION_NODE
COMMENT_NODE
DOCUMENT_NODE
DOCUMENT_TYPE_NODE
DOCUMENT_FRAG_NODE
DOCUMENT_FRAGMENT_NODE
NOTATION_NODE
HTML_DOCUMENT_NODE
DTD_NODE
ELEMENT_DECLARATION
ATTRIBUTE_DECLARATION
ENTITY_DECLARATION
NAMESPACE_DECLARATION
XINCLUDE_END
XINCLUDE_START
encodeToUTF8
decodeFromUTF8
)],
w3c => [qw(
ELEMENT_NODE
ATTRIBUTE_NODE
TEXT_NODE
CDATA_SECTION_NODE
ENTITY_REFERENCE_NODE
ENTITY_NODE
PI_NODE
PROCESSING_INSTRUCTION_NODE
COMMENT_NODE
DOCUMENT_NODE
DOCUMENT_TYPE_NODE
DOCUMENT_FRAG_NODE
DOCUMENT_FRAGMENT_NODE
NOTATION_NODE
HTML_DOCUMENT_NODE
DTD_NODE
ELEMENT_DECLARATION
ATTRIBUTE_DECLARATION
ENTITY_DECLARATION
NAMESPACE_DECLARATION
XINCLUDE_END
XINCLUDE_START
)],
libxml => [qw(
XML_ELEMENT_NODE
XML_ATTRIBUTE_NODE
XML_TEXT_NODE
XML_CDATA_SECTION_NODE
XML_ENTITY_REF_NODE
XML_ENTITY_NODE
XML_PI_NODE
XML_COMMENT_NODE
XML_DOCUMENT_NODE
XML_DOCUMENT_TYPE_NODE
XML_DOCUMENT_FRAG_NODE
XML_NOTATION_NODE
XML_HTML_DOCUMENT_NODE
XML_DTD_NODE
XML_ELEMENT_DECL
XML_ATTRIBUTE_DECL
XML_ENTITY_DECL
XML_NAMESPACE_DECL
XML_XINCLUDE_END
XML_XINCLUDE_START
)],
gdome => [qw(
GDOME_ELEMENT_NODE
GDOME_ATTRIBUTE_NODE
GDOME_TEXT_NODE
GDOME_CDATA_SECTION_NODE
GDOME_ENTITY_REF_NODE
GDOME_ENTITY_NODE
GDOME_PI_NODE
GDOME_COMMENT_NODE
GDOME_DOCUMENT_NODE
GDOME_DOCUMENT_TYPE_NODE
GDOME_DOCUMENT_FRAG_NODE
GDOME_NOTATION_NODE
GDOME_HTML_DOCUMENT_NODE
GDOME_DTD_NODE
GDOME_ELEMENT_DECL
GDOME_ATTRIBUTE_DECL
GDOME_ENTITY_DECL
GDOME_NAMESPACE_DECL
GDOME_XINCLUDE_END
GDOME_XINCLUDE_START
)],
encoding => [qw(
encodeToUTF8
decodeFromUTF8
)],
);
@EXPORT_OK = (
@{$EXPORT_TAGS{encoding}},
@{$EXPORT_TAGS{w3c}},
@{$EXPORT_TAGS{libxml}},
@{$EXPORT_TAGS{gdome}},
);
@EXPORT = (
@{$EXPORT_TAGS{encoding}},
@{$EXPORT_TAGS{w3c}},
);
#-------------------------------------------------------------------------#
# W3 conform node types #
#-------------------------------------------------------------------------#
use constant ELEMENT_NODE => 1;
use constant ATTRIBUTE_NODE => 2;
use constant TEXT_NODE => 3;
use constant CDATA_SECTION_NODE => 4;
use constant ENTITY_REFERENCE_NODE => 5;
use constant ENTITY_NODE => 6;
use constant PROCESSING_INSTRUCTION_NODE => 7;
use constant COMMENT_NODE => 8;
use constant DOCUMENT_NODE => 9;
use constant DOCUMENT_TYPE_NODE => 10;
use constant DOCUMENT_FRAGMENT_NODE => 11;
use constant NOTATION_NODE => 12;
use constant HTML_DOCUMENT_NODE => 13;
use constant DTD_NODE => 14;
use constant ELEMENT_DECLARATION => 15;
use constant ATTRIBUTE_DECLARATION => 16;
use constant ENTITY_DECLARATION => 17;
use constant NAMESPACE_DECLARATION => 18;
#-------------------------------------------------------------------------#
# some extras for the W3 spec
#-------------------------------------------------------------------------#
use constant PI_NODE => 7;
use constant DOCUMENT_FRAG_NODE => 11;
use constant XINCLUDE_END => 19;
use constant XINCLUDE_START => 20;
#-------------------------------------------------------------------------#
# libgdome compat names #
#-------------------------------------------------------------------------#
use constant GDOME_ELEMENT_NODE => 1;
use constant GDOME_ATTRIBUTE_NODE => 2;
use constant GDOME_TEXT_NODE => 3;
use constant GDOME_CDATA_SECTION_NODE => 4;
use constant GDOME_ENTITY_REF_NODE => 5;
use constant GDOME_ENTITY_NODE => 6;
use constant GDOME_PI_NODE => 7;
use constant GDOME_COMMENT_NODE => 8;
use constant GDOME_DOCUMENT_NODE => 9;
use constant GDOME_DOCUMENT_TYPE_NODE => 10;
use constant GDOME_DOCUMENT_FRAG_NODE => 11;
use constant GDOME_NOTATION_NODE => 12;
use constant GDOME_HTML_DOCUMENT_NODE => 13;
use constant GDOME_DTD_NODE => 14;
use constant GDOME_ELEMENT_DECL => 15;
use constant GDOME_ATTRIBUTE_DECL => 16;
use constant GDOME_ENTITY_DECL => 17;
use constant GDOME_NAMESPACE_DECL => 18;
use constant GDOME_XINCLUDE_START => 19;
use constant GDOME_XINCLUDE_END => 20;
1;
#-------------------------------------------------------------------------#
__END__

View File

@@ -0,0 +1,136 @@
=head1 NAME
XML::LibXML::Common - Constants and Character Encoding Routines
=head1 SYNOPSIS
use XML::LibXML::Common;
$encodedstring = encodeToUTF8( $name_of_encoding, $sting_to_encode );
$decodedstring = decodeFromUTF8($name_of_encoding, $string_to_decode );
=head1 DESCRIPTION
XML::LibXML::Common defines constants for all node types and provides interface
to libxml2 charset conversion functions.
Since XML::LibXML use their own node type definitions, one may want to use
XML::LibXML::Common in its compatibility mode:
=head2 Exporter TAGS
use XML::LibXML::Common qw(:libxml);
C<<<<<< :libxml >>>>>> tag will use the XML::LibXML Compatibility mode, which defines the old 'XML_'
node-type definitions.
use XML::LibXML::Common qw(:gdome);
C<<<<<< :gdome >>>>>> tag will use the XML::GDOME Compatibility mode, which defines the old 'GDOME_'
node-type definitions.
use XML::LibXML::Common qw(:w3c);
This uses the nodetype definition names as specified for DOM.
use XML::LibXML::Common qw(:encoding);
This tag can be used to export only the charset encoding functions of
XML::LibXML::Common.
=head2 Exports
By default the W3 definitions as defined in the DOM specifications and the
encoding functions are exported by XML::LibXML::Common.
=head2 Encoding functions
To encode or decode a string to or from UTF-8, XML::LibXML::Common exports two
functions, which provide an interface to the encoding support in C<<<<<< libxml2 >>>>>>. Which encodings are supported by these functions depends on how C<<<<<< libxml2 >>>>>> was compiled. UTF-16 is always supported and on most installations, ISO
encodings are supported as well.
This interface was useful for older versions of Perl. Since Perl >= 5.8
provides similar functions via the C<<<<<< Encode >>>>>> module, it is probably a good idea to use those instead.
=over 4
=item encodeToUTF8
$encodedstring = encodeToUTF8( $name_of_encoding, $sting_to_encode );
The function will convert a byte string from the specified encoding to an UTF-8
encoded character string.
=item decodeToUTF8
$decodedstring = decodeFromUTF8($name_of_encoding, $string_to_decode );
This function converts an UTF-8 encoded character string to a specified
encoding. Note that the conversion can raise an error if the given string
contains characters that cannot be represented in the target encoding.
=back
Both these functions report their errors on the standard error. If an error
occurs the function will croak(). To catch the error information it is required
to call the encoding function from within an eval block in order to prevent the
entire script from being stopped on encoding error.
=head2 A note on history
Before XML::LibXML 1.70, this class was available as a separate CPAN
distribution, intended to provide functionality shared between XML::LibXML,
XML::GDOME, and possibly other modules. Since there seems to be no progress in
this direction, we decided to merge XML::LibXML::Common 0.13 and XML::LibXML
1.70 to one CPAN distribution.
The merge also naturally eliminates a practical and urgent problem experienced
by many XML::LibXML users on certain platforms, namely mysterious misbehavior
of XML::LibXML occurring if the installed (often pre-packaged) version of
XML::LibXML::Common was compiled against an older version of libxml2 than
XML::LibXML.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,149 @@
=head1 NAME
XML::LibXML::DOM - XML::LibXML DOM Implementation
=head1 DESCRIPTION
XML::LibXML provides a lightweight interface to I<<<<<< modify >>>>>> a node of the document tree generated by the XML::LibXML parser. This interface
follows as far as possible the DOM Level 3 specification. In addition to the
specified functions, XML::LibXML supports some functions that are more handy to
use in the perl environment.
One also has to remember, that XML::LibXML is an interface to libxml2 nodes
which actually reside on the C-Level of XML::LibXML. This means each node is a
reference to a structure which is different from a perl hash or array. The only
way to access these structures' values is through the DOM interface provided by
XML::LibXML. This also means, that one I<<<<<< can't >>>>>> simply inherit an XML::LibXML node and add new member variables as if they were
hash keys.
The DOM interface of XML::LibXML does not intend to implement a full DOM
interface as it is done by XML::GDOME and used for full featured application.
Moreover, it offers an simple way to build or modify documents that are created
by XML::LibXML's parser.
Another target of the XML::LibXML interface is to make the interfaces of
libxml2 available to the perl community. This includes also some workarounds to
some features where libxml2 assumes more control over the C-Level that most
perl users don't have.
One of the most important parts of the XML::LibXML DOM interface is that the
interfaces try to follow the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>) rather strictly. This means the interface functions are named as the DOM
specification says and not what widespread Java interfaces claim to be the
standard. Although there are several functions that have only a singular
interface that conforms to the DOM spec XML::LibXML provides an additional Java
style alias interface.
Moreover, there are some function interfaces left over from early stages of
XML::LibXML for compatibility reasons. These interfaces are for compatibility
reasons I<<<<<< only >>>>>>. They might disappear in one of the future versions of XML::LibXML, so a user
is requested to switch over to the official functions.
=head2 Encodings and XML::LibXML's DOM implementation
See the section on Encodings in the I<<<<<< XML::LibXML >>>>>> manual page.
=head2 Namespaces and XML::LibXML's DOM implementation
XML::LibXML's DOM implementation is limited by the DOM implementation of
libxml2 which treats namespaces slightly differently than required by the DOM
Level 2 specification.
According to the DOM Level 2 specification, namespaces of elements and
attributes should be persistent, and nodes should be permanently bound to
namespace URIs as they get created; it should be possible to manipulate the
special attributes used for declaring XML namespaces just as other attributes
without affecting the namespaces of other nodes. In DOM Level 2, the
application is responsible for creating the special attributes consistently
and/or for correct serialization of the document.
This is both inconvenient, causes problems in serialization of DOM to XML, and
most importantly, seems almost impossible to implement over libxml2.
In libxml2, namespace URI and prefix of a node is provided by a pointer to a
namespace declaration (appearing as a special xmlns attribute in the XML
document). If the prefix or namespace URI of the declaration changes, the
prefix and namespace URI of all nodes that point to it changes as well.
Moreover, in contrast to DOM, a node (element or attribute) can only be bound
to a namespace URI if there is some namespace declaration in the document to
point to.
Therefore current DOM implementation in XML::LibXML tries to treat namespace
declarations in a compromise between reason, common sense, limitations of
libxml2, and the DOM Level 2 specification.
In XML::LibXML, special attributes declaring XML namespaces are often created
automatically, usually when a namespaced node is attached to a document and no
existing declaration of the namespace and prefix is in the scope to be reused.
In this respect, XML::LibXML DOM implementation differs from the DOM Level 2
specification according to which special attributes for declaring the
appropriate XML namespaces should not be added when a node with a namespace
prefix and namespace URI is created.
Namespace declarations are also created when L<<<<<< XML::LibXML::Document >>>>>>'s createElementNS() or createAttributeNS() function are used. If the a
namespace is not declared on the documentElement, the namespace will be locally
declared for the newly created node. In case of Attributes this may look a bit
confusing, since these nodes cannot have namespace declarations itself. In this
case the namespace is internally applied to the attribute and later declared on
the node the attribute is appended to (if required).
The following example may explain this a bit:
my $doc = XML::LibXML->createDocument;
my $root = $doc->createElementNS( "", "foo" );
$doc->setDocumentElement( $root );
my $attr = $doc->createAttributeNS( "bar", "bar:foo", "test" );
$root->setAttributeNodeNS( $attr );
This piece of code will result in the following document:
<?xml version="1.0"?>
<foo xmlns:bar="bar" bar:foo="test"/>
The namespace is declared on the document element during the
setAttributeNodeNS() call.
Namespaces can be also declared explicitly by the use of XML::LibXML::Element's
setNamespace() function. Since 1.61, they can also be manipulated with
functions setNamespaceDeclPrefix() and setNamespaceDeclURI() (not available in
DOM). Changing an URI or prefix of an existing namespace declaration affects
the namespace URI and prefix of all nodes which point to it (that is the nodes
in its scope).
It is also important to repeat the specification: While working with namespaces
you should use the namespace aware functions instead of the simplified
versions. For example you should I<<<<<< never >>>>>> use setAttribute() but setAttributeNS().
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,216 @@
# $Id: $
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2011 Joachim Zobel
#
package XML::LibXML::Devel;
use strict;
use warnings;
use XML::LibXML;
use vars qw ($VERSION);
$VERSION = "2.0206"; # VERSION TEMPLATE: DO NOT CHANGE
use 5.008_000;
use parent qw(Exporter);
use vars qw( @EXPORT @EXPORT_OK %EXPORT_TAGS );
# This allows declaration use XML::LibXML::Devel ':all';
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
# will save memory.
our %EXPORT_TAGS = ( 'all' => [ qw(
node_to_perl
node_from_perl
refcnt_inc
refcnt_dec
refcnt
fix_owner
mem_used
) ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
# Preloaded methods go here.
1;
__END__
=head1 NAME
XML::LibXML::Devel - makes functions from LibXML.xs available
=head1 SYNOPSIS
/**********************************************
* C functions you want to access
*/
xmlNode *return_node();
void receive_node(xmlNode *);
###############################################
# XS Code
void *
xs_return_node
CODE:
RETVAL = return_node();
OUTPUT:
RETVAL
void
xs_receive_node
void *n
CODE:
receive_node(n);
###############################################
# Perl code
use XML::LibXML::Devel;
sub return_node
{
my $raw_node = xs_return_node();
my $node = XML::LibXML::Devel::node_to_perl($raw_node);
XML::LibXML::Devel::refcnt_inc($raw_node);
return $node;
}
sub receive_node
{
my ($node) = @_;
my $raw_node = XML::LibXML::Devel::node_from_perl($node);
xs_receive_node($raw_node);
XML::LibXML::Devel::refcnt_inc($raw_node);
}
=head1 DESCRIPTION
C<XML::LibXML::Devel> makes functions from LibXML.xs available that
are needed to wrap libxml2 nodes in and out of XML::LibXML::Nodes.
This gives cleaner dependencies than using LibXML.so directly.
To XS a library that uses libxml2 nodes the first step is to
do this so that xmlNodePtr is passed as void *. These raw nodes
are then turned into libxml nodes by using this C<Devel> functions.
Be aware that this module is currently rather experimental. The function
names may change if I XS more functions and introduce a reasonable
naming convention.
Be also aware that this module is a great tool to cause segfaults and
introduce memory leaks. It does however provide a partial cure by making
C<xmlMemUsed> available as C<mem_used>.
=head1 FUNCTIONS
=head2 NODE MANAGEMENT
=over 1
=item node_to_perl
node_to_perl($raw_node);
Returns a LibXML::Node object. This has a proxy node with a reference
counter and an owner attached. The raw node will be deleted as soon
as the reference counter reaches zero.
If the C library is keeping a
pointer to the raw node, you need to call refcnt_inc immediately.
You also need to replace xmlFreeNode by a call to refcnt_dec.
=item node_to_perl
node_from_perl($node);
Returns a raw node. This is a void * pointer and you can do nothing
but passing it to functions that treat it as an xmlNodePtr. The
raw node will be freed as soon as its reference counter reaches zero.
If the C library is keeping a
pointer to the raw node, you need to call refcnt_inc immediately.
You also need to replace xmlFreeNode by a call to refcnt_dec.
=item refcnt_inc
refcnt_inc($raw_node);
Increments the raw nodes reference counter. The raw node must already
be known to perl to have a reference counter.
=item refcnt_dec
refcnt_dec($raw_node);
Decrements the raw nodes reference counter and returns the value it
had before. if the counter becomes zero or less,
this method will free the proxy node holding the reference counter.
If the node is part of a
subtree, refcnt_dec will fix the reference counts and delete
the subtree if it is not required any more.
=item refcnt
refcnt($raw_node);
Returns the value of the reference counter.
=item fix_owner
fix_owner($raw_node, $raw_parent);
This functions fixes the reference counts for an entire subtree.
it is very important to fix an entire subtree after node operations
where the documents or the owner node may get changed. this method is
aware about nodes that already belong to a certain owner node.
=back
=head2 MEMORY DEBUGGING
=over 1
=item $ENV{DEBUG_MEMORY}
BEGIN {$ENV{DEBUG_MEMORY} = 1;};
use XML::LibXML;
This turns on libxml2 memory debugging. It must be set before
XML::LibXML is loaded.
=item mem_used
mem_used();
Returns the number of bytes currently allocated.
=back
=head2 EXPORT
None by default.
=head1 SEE ALSO
This was created to support the needs of Apache2::ModXml2. So this
can serve as an example.
=head1 AUTHOR
Joachim Zobel E<lt>jz-2011@heute-morgen.deE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2011 by Joachim Zobel
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.1 or,
at your option, any later version of Perl 5 you may have available.
=cut

View File

@@ -0,0 +1,703 @@
=head1 NAME
XML::LibXML::Document - XML::LibXML DOM Document Class
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Document nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
$dom = XML::LibXML::Document->new( $version, $encoding );
$dom = XML::LibXML::Document->createDocument( $version, $encoding );
$strURI = $doc->URI();
$doc->setURI($strURI);
$strEncoding = $doc->encoding();
$strEncoding = $doc->actualEncoding();
$doc->setEncoding($new_encoding);
$strVersion = $doc->version();
$doc->standalone
$doc->setStandalone($numvalue);
my $compression = $doc->compression;
$doc->setCompression($ziplevel);
$docstring = $dom->toString($format);
$c14nstr = $doc->toStringC14N($comment_flag, $xpath [, $xpath_context ]);
$ec14nstr = $doc->toStringEC14N($comment_flag, $xpath [, $xpath_context ], $inclusive_prefix_list);
$str = $doc->serialize($format);
$state = $doc->toFile($filename, $format);
$state = $doc->toFH($fh, $format);
$str = $document->toStringHTML();
$str = $document->serialize_html();
$bool = $dom->is_valid();
$dom->validate();
$root = $dom->documentElement();
$dom->setDocumentElement( $root );
$element = $dom->createElement( $nodename );
$element = $dom->createElementNS( $namespaceURI, $nodename );
$text = $dom->createTextNode( $content_text );
$comment = $dom->createComment( $comment_text );
$attrnode = $doc->createAttribute($name [,$value]);
$attrnode = $doc->createAttributeNS( namespaceURI, $name [,$value] );
$fragment = $doc->createDocumentFragment();
$cdata = $dom->createCDATASection( $cdata_content );
my $pi = $doc->createProcessingInstruction( $target, $data );
my $entref = $doc->createEntityReference($refname);
$dtd = $document->createInternalSubset( $rootnode, $public, $system);
$dtd = $document->createExternalSubset( $rootnode_name, $publicId, $systemId);
$document->importNode( $node );
$document->adoptNode( $node );
my $dtd = $doc->externalSubset;
my $dtd = $doc->internalSubset;
$doc->setExternalSubset($dtd);
$doc->setInternalSubset($dtd);
my $dtd = $doc->removeExternalSubset();
my $dtd = $doc->removeInternalSubset();
my @nodelist = $doc->getElementsByTagName($tagname);
my @nodelist = $doc->getElementsByTagNameNS($nsURI,$tagname);
my @nodelist = $doc->getElementsByLocalName($localname);
my $node = $doc->getElementById($id);
$dom->indexElements();
=head1 DESCRIPTION
The Document Class is in most cases the result of a parsing process. But
sometimes it is necessary to create a Document from scratch. The DOM Document
Class provides functions that conform to the DOM Core naming style.
It inherits all functions from L<<<<<< XML::LibXML::Node >>>>>> as specified in the DOM specification. This enables access to the nodes besides
the root element on document level - a C<<<<<< DTD >>>>>> for example. The support for these nodes is limited at the moment.
While generally nodes are bound to a document in the DOM concept it is
suggested that one should always create a node not bound to any document. There
is no need of really including the node to the document, but once the node is
bound to a document, it is quite safe that all strings have the correct
encoding. If an unbound text node with an ISO encoded string is created (e.g.
with $CLASS->new()), the C<<<<<< toString >>>>>> function may not return the expected result.
To prevent such problems, it is recommended to pass all data to XML::LibXML
methods as character strings (i.e. UTF-8 encoded, with the UTF8 flag on).
=head1 METHODS
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$dom = XML::LibXML::Document->new( $version, $encoding );
alias for createDocument()
=item createDocument
$dom = XML::LibXML::Document->createDocument( $version, $encoding );
The constructor for the document class. As Parameter it takes the version
string and (optionally) the encoding string. Simply calling I<<<<<< createDocument >>>>>>() will create the document:
<?xml version="your version" encoding="your encoding"?>
Both parameter are optional. The default value for I<<<<<< $version >>>>>> is C<<<<<< 1.0 >>>>>>, of course. If the I<<<<<< $encoding >>>>>> parameter is not set, the encoding will be left unset, which means UTF-8 is
implied.
The call of I<<<<<< createDocument >>>>>>() without any parameter will result the following code:
<?xml version="1.0"?>
Alternatively one can call this constructor directly from the XML::LibXML class
level, to avoid some typing. This will not have any effect on the class
instance, which is always XML::LibXML::Document.
my $document = XML::LibXML->createDocument( "1.0", "UTF-8" );
is therefore a shortcut for
my $document = XML::LibXML::Document->createDocument( "1.0", "UTF-8" );
=item URI
$strURI = $doc->URI();
Returns the URI (or filename) of the original document. For documents obtained
by parsing a string of a FH without using the URI parsing argument of the
corresponding C<<<<<< parse_* >>>>>> function, the result is a generated string unknown-XYZ where XYZ is some
number; for documents created with the constructor C<<<<<< new >>>>>>, the URI is undefined.
The value can be modified by calling C<<<<<< setURI >>>>>> method on the document node.
=item setURI
$doc->setURI($strURI);
Sets the URI of the document reported by the method URI (see also the URI
argument to the various C<<<<<< parse_* >>>>>> functions).
=item encoding
$strEncoding = $doc->encoding();
returns the encoding string of the document.
my $doc = XML::LibXML->createDocument( "1.0", "ISO-8859-15" );
print $doc->encoding; # prints ISO-8859-15
=item actualEncoding
$strEncoding = $doc->actualEncoding();
returns the encoding in which the XML will be returned by $doc->toString().
This is usually the original encoding of the document as declared in the XML
declaration and returned by $doc->encoding. If the original encoding is not
known (e.g. if created in memory or parsed from a XML without a declared
encoding), 'UTF-8' is returned.
my $doc = XML::LibXML->createDocument( "1.0", "ISO-8859-15" );
print $doc->encoding; # prints ISO-8859-15
=item setEncoding
$doc->setEncoding($new_encoding);
This method allows one to change the declaration of encoding in the XML
declaration of the document. The value also affects the encoding in which the
document is serialized to XML by $doc->toString(). Use setEncoding() to remove
the encoding declaration.
=item version
$strVersion = $doc->version();
returns the version string of the document
I<<<<<< getVersion() >>>>>> is an alternative form of this function.
=item standalone
$doc->standalone
This function returns the Numerical value of a documents XML declarations
standalone attribute. It returns I<<<<<< 1 >>>>>> if standalone="yes" was found, I<<<<<< 0 >>>>>> if standalone="no" was found and I<<<<<< -1 >>>>>> if standalone was not specified (default on creation).
=item setStandalone
$doc->setStandalone($numvalue);
Through this method it is possible to alter the value of a documents standalone
attribute. Set it to I<<<<<< 1 >>>>>> to set standalone="yes", to I<<<<<< 0 >>>>>> to set standalone="no" or set it to I<<<<<< -1 >>>>>> to remove the standalone attribute from the XML declaration.
=item compression
my $compression = $doc->compression;
libxml2 allows reading of documents directly from gzipped files. In this case
the compression variable is set to the compression level of that file (0-8). If
XML::LibXML parsed a different source or the file wasn't compressed, the
returned value will be I<<<<<< -1 >>>>>>.
=item setCompression
$doc->setCompression($ziplevel);
If one intends to write the document directly to a file, it is possible to set
the compression level for a given document. This level can be in the range from
0 to 8. If XML::LibXML should not try to compress use I<<<<<< -1 >>>>>> (default).
Note that this feature will I<<<<<< only >>>>>> work if libxml2 is compiled with zlib support and toFile() is used for output.
=item toString
$docstring = $dom->toString($format);
I<<<<<< toString >>>>>> is a DOM serializing function, so the DOM Tree is serialized into an XML
string, ready for output.
IMPORTANT: unlike toString for other nodes, on document nodes this function
returns the XML as a byte string in the original encoding of the document (see
the actualEncoding() method)! This means you can simply do:
open my $out_fh, '>', $file;
print {$out_fh} $doc->toString;
regardless of the actual encoding of the document. See the section on encodings
in L<<<<<< XML::LibXML >>>>>> for more details.
The optional I<<<<<< $format >>>>>> parameter sets the indenting of the output. This parameter is expected to be an C<<<<<< integer >>>>>> value, that specifies that indentation should be used. The format parameter can
have three different values if it is used:
If $format is 0, than the document is dumped as it was originally parsed
If $format is 1, libxml2 will add ignorable white spaces, so the nodes content
is easier to read. Existing text nodes will not be altered
If $format is 2 (or higher), libxml2 will act as $format == 1 but it add a
leading and a trailing line break to each text node.
libxml2 uses a hard-coded indentation of 2 space characters per indentation
level. This value can not be altered on run-time.
=item toStringC14N
$c14nstr = $doc->toStringC14N($comment_flag, $xpath [, $xpath_context ]);
See the documentation in L<<<<<< XML::LibXML::Node >>>>>>.
=item toStringEC14N
$ec14nstr = $doc->toStringEC14N($comment_flag, $xpath [, $xpath_context ], $inclusive_prefix_list);
See the documentation in L<<<<<< XML::LibXML::Node >>>>>>.
=item serialize
$str = $doc->serialize($format);
An alias for toString(). This function was name added to be more consistent
with libxml2.
=item serialize_c14n
An alias for toStringC14N().
=item serialize_exc_c14n
An alias for toStringEC14N().
=item toFile
$state = $doc->toFile($filename, $format);
This function is similar to toString(), but it writes the document directly
into a filesystem. This function is very useful, if one needs to store large
documents.
The format parameter has the same behaviour as in toString().
=item toFH
$state = $doc->toFH($fh, $format);
This function is similar to toString(), but it writes the document directly to
a filehandle or a stream. A byte stream in the document encoding is passed to
the file handle. Do NOT apply any C<<<<<< :encoding(...) >>>>>> or C<<<<<< :utf8 >>>>>> PerlIO layer to the filehandle! See the section on encodings in L<<<<<< XML::LibXML >>>>>> for more details.
The format parameter has the same behaviour as in toString().
=item toStringHTML
$str = $document->toStringHTML();
I<<<<<< toStringHTML >>>>>> serialize the tree to a byte string in the document encoding as HTML. With this
method indenting is automatic and managed by libxml2 internally.
=item serialize_html
$str = $document->serialize_html();
An alias for toStringHTML().
=item is_valid
$bool = $dom->is_valid();
Returns either TRUE or FALSE depending on whether the DOM Tree is a valid
Document or not.
You may also pass in a L<<<<<< XML::LibXML::Dtd >>>>>> object, to validate against an external DTD:
if (!$dom->is_valid($dtd)) {
warn("document is not valid!");
}
=item validate
$dom->validate();
This is an exception throwing equivalent of is_valid. If the document is not
valid it will throw an exception containing the error. This allows you much
better error reporting than simply is_valid or not.
Again, you may pass in a DTD object
=item documentElement
$root = $dom->documentElement();
Returns the root element of the Document. A document can have just one root
element to contain the documents data.
Optionally one can use I<<<<<< getDocumentElement >>>>>>.
=item setDocumentElement
$dom->setDocumentElement( $root );
This function enables you to set the root element for a document. The function
supports the import of a node from a different document tree, but does not
support a document fragment as $root.
=item createElement
$element = $dom->createElement( $nodename );
This function creates a new Element Node bound to the DOM with the name C<<<<<< $nodename >>>>>>.
=item createElementNS
$element = $dom->createElementNS( $namespaceURI, $nodename );
This function creates a new Element Node bound to the DOM with the name C<<<<<< $nodename >>>>>> and placed in the given namespace.
=item createTextNode
$text = $dom->createTextNode( $content_text );
As an equivalent of I<<<<<< createElement >>>>>>, but it creates a I<<<<<< Text Node >>>>>> bound to the DOM.
=item createComment
$comment = $dom->createComment( $comment_text );
As an equivalent of I<<<<<< createElement >>>>>>, but it creates a I<<<<<< Comment Node >>>>>> bound to the DOM.
=item createAttribute
$attrnode = $doc->createAttribute($name [,$value]);
Creates a new Attribute node.
=item createAttributeNS
$attrnode = $doc->createAttributeNS( namespaceURI, $name [,$value] );
Creates an Attribute bound to a namespace.
=item createDocumentFragment
$fragment = $doc->createDocumentFragment();
This function creates a DocumentFragment.
=item createCDATASection
$cdata = $dom->createCDATASection( $cdata_content );
Similar to createTextNode and createComment, this function creates a
CDataSection bound to the current DOM.
=item createProcessingInstruction
my $pi = $doc->createProcessingInstruction( $target, $data );
create a processing instruction node.
Since this method is quite long one may use its short form I<<<<<< createPI() >>>>>>.
=item createEntityReference
my $entref = $doc->createEntityReference($refname);
If a document has a DTD specified, one can create entity references by using
this function. If one wants to add a entity reference to the document, this
reference has to be created by this function.
An entity reference is unique to a document and cannot be passed to other
documents as other nodes can be passed.
I<<<<<< NOTE: >>>>>> A text content containing something that looks like an entity reference, will
not be expanded to a real entity reference unless it is a predefined entity
my $string = "&foo;";
$some_element->appendText( $string );
print $some_element->textContent; # prints "&amp;foo;"
=item createInternalSubset
$dtd = $document->createInternalSubset( $rootnode, $public, $system);
This function creates and adds an internal subset to the given document.
Because the function automatically adds the DTD to the document there is no
need to add the created node explicitly to the document.
my $document = XML::LibXML::Document->new();
my $dtd = $document->createInternalSubset( "foo", undef, "foo.dtd" );
will result in the following XML document:
<?xml version="1.0"?>
<!DOCTYPE foo SYSTEM "foo.dtd">
By setting the public parameter it is possible to set PUBLIC DTDs to a given
document. So
my $document = XML::LibXML::Document->new();
my $dtd = $document->createInternalSubset( "foo", "-//FOO//DTD FOO 0.1//EN", undef );
will cause the following declaration to be created on the document:
<?xml version="1.0"?>
<!DOCTYPE foo PUBLIC "-//FOO//DTD FOO 0.1//EN">
=item createExternalSubset
$dtd = $document->createExternalSubset( $rootnode_name, $publicId, $systemId);
This function is similar to C<<<<<< createInternalSubset() >>>>>> but this DTD is considered to be external and is therefore not added to the
document itself. Nevertheless it can be used for validation purposes.
=item importNode
$document->importNode( $node );
If a node is not part of a document, it can be imported to another document. As
specified in DOM Level 2 Specification the Node will not be altered or removed
from its original document (C<<<<<< $node-E<gt>cloneNode(1) >>>>>> will get called implicitly).
I<<<<<< NOTE: >>>>>> Don't try to use importNode() to import sub-trees that contain an entity
reference - even if the entity reference is the root node of the sub-tree. This
will cause serious problems to your program. This is a limitation of libxml2
and not of XML::LibXML itself.
=item adoptNode
$document->adoptNode( $node );
If a node is not part of a document, it can be imported to another document. As
specified in DOM Level 3 Specification the Node will not be altered but it will
removed from its original document.
After a document adopted a node, the node, its attributes and all its
descendants belong to the new document. Because the node does not belong to the
old document, it will be unlinked from its old location first.
I<<<<<< NOTE: >>>>>> Don't try to adoptNode() to import sub-trees that contain entity references -
even if the entity reference is the root node of the sub-tree. This will cause
serious problems to your program. This is a limitation of libxml2 and not of
XML::LibXML itself.
=item externalSubset
my $dtd = $doc->externalSubset;
If a document has an external subset defined it will be returned by this
function.
I<<<<<< NOTE >>>>>> Dtd nodes are no ordinary nodes in libxml2. The support for these nodes in
XML::LibXML is still limited. In particular one may not want use common node
function on doctype declaration nodes!
=item internalSubset
my $dtd = $doc->internalSubset;
If a document has an internal subset defined it will be returned by this
function.
I<<<<<< NOTE >>>>>> Dtd nodes are no ordinary nodes in libxml2. The support for these nodes in
XML::LibXML is still limited. In particular one may not want use common node
function on doctype declaration nodes!
=item setExternalSubset
$doc->setExternalSubset($dtd);
I<<<<<< EXPERIMENTAL! >>>>>>
This method sets a DTD node as an external subset of the given document.
=item setInternalSubset
$doc->setInternalSubset($dtd);
I<<<<<< EXPERIMENTAL! >>>>>>
This method sets a DTD node as an internal subset of the given document.
=item removeExternalSubset
my $dtd = $doc->removeExternalSubset();
I<<<<<< EXPERIMENTAL! >>>>>>
If a document has an external subset defined it can be removed from the
document by using this function. The removed dtd node will be returned.
=item removeInternalSubset
my $dtd = $doc->removeInternalSubset();
I<<<<<< EXPERIMENTAL! >>>>>>
If a document has an internal subset defined it can be removed from the
document by using this function. The removed dtd node will be returned.
=item getElementsByTagName
my @nodelist = $doc->getElementsByTagName($tagname);
Implements the DOM Level 2 function
In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item getElementsByTagNameNS
my @nodelist = $doc->getElementsByTagNameNS($nsURI,$tagname);
Implements the DOM Level 2 function
In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item getElementsByLocalName
my @nodelist = $doc->getElementsByLocalName($localname);
This allows the fetching of all nodes from a given document with the given
Localname.
In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item getElementById
my $node = $doc->getElementById($id);
Returns the element that has an ID attribute with the given value. If no such
element exists, this returns undef.
Note: the ID of an element may change while manipulating the document. For
documents with a DTD, the information about ID attributes is only available if
DTD loading/validation has been requested. For HTML documents parsed with the
HTML parser ID detection is done automatically. In XML documents, all "xml:id"
attributes are considered to be of type ID. You can test ID-ness of an
attribute node with $attr->isId().
In versions 1.59 and earlier this method was called getElementsById() (plural)
by mistake. Starting from 1.60 this name is maintained as an alias only for
backward compatibility.
=item indexElements
$dom->indexElements();
This function causes libxml2 to stamp all elements in a document with their
document position index which considerably speeds up XPath queries for large
documents. It should only be used with static documents that won't be further
changed by any DOM methods, because once a document is indexed, XPath will
always prefer the index to other methods of determining the document order of
nodes. XPath could therefore return improperly ordered node-lists when applied
on a document that has been changed after being indexed. It is of course
possible to use this method to re-index a modified document before using it
with XPath again. This function is not a part of the DOM specification.
This function returns number of elements indexed, -1 if error occurred, or -2
if this feature is not available in the running libxml2.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,47 @@
=head1 NAME
XML::LibXML::DocumentFragment - XML::LibXML's DOM L2 Document Fragment Implementation
=head1 SYNOPSIS
use XML::LibXML;
=head1 DESCRIPTION
This class is a helper class as described in the DOM Level 2 Specification. It
is implemented as a node without name. All adding, inserting or replacing
functions are aware of document fragments now.
As well I<<<<<< all >>>>>> unbound nodes (all nodes that do not belong to any document sub-tree) are
implicit members of document fragments.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,109 @@
=head1 NAME
XML::LibXML::Dtd - XML::LibXML DTD Handling
=head1 SYNOPSIS
use XML::LibXML;
$dtd = XML::LibXML::Dtd->new($public_id, $system_id);
$dtd = XML::LibXML::Dtd->parse_string($dtd_str);
$publicId = $dtd->getName();
$publicId = $dtd->publicId();
$systemId = $dtd->systemId();
=head1 DESCRIPTION
This class holds a DTD. You may parse a DTD from either a string, or from an
external SYSTEM identifier.
No support is available as yet for parsing from a filehandle.
XML::LibXML::Dtd is a sub-class of L<<<<<< XML::LibXML::Node >>>>>>, so all the methods available to nodes (particularly toString()) are available
to Dtd objects.
=head1 METHODS
=over 4
=item new
$dtd = XML::LibXML::Dtd->new($public_id, $system_id);
Parse a DTD from the system identifier, and return a DTD object that you can
pass to $doc->is_valid() or $doc->validate().
my $dtd = XML::LibXML::Dtd->new(
"SOME // Public / ID / 1.0",
"test.dtd"
);
my $doc = XML::LibXML->new->parse_file("test.xml");
$doc->validate($dtd);
=item parse_string
$dtd = XML::LibXML::Dtd->parse_string($dtd_str);
The same as new() above, except you can parse a DTD from a string. Note that
parsing from string may fail if the DTD contains external parametric-entity
references with relative URLs.
=item getName
$publicId = $dtd->getName();
Returns the name of DTD; i.e., the name immediately following the DOCTYPE
keyword.
=item publicId
$publicId = $dtd->publicId();
Returns the public identifier of the external subset.
=item systemId
$systemId = $dtd->systemId();
Returns the system identifier of the external subset.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,402 @@
=head1 NAME
XML::LibXML::Element - XML::LibXML Class for Element Nodes
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Element nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
$node = XML::LibXML::Element->new( $name );
$node->setAttribute( $aname, $avalue );
$node->setAttributeNS( $nsURI, $aname, $avalue );
$avalue = $node->getAttribute( $aname );
$avalue = $node->getAttributeNS( $nsURI, $aname );
$attrnode = $node->getAttributeNode( $aname );
$attrnode = $node->getAttributeNodeNS( $namespaceURI, $aname );
$node->removeAttribute( $aname );
$node->removeAttributeNS( $nsURI, $aname );
$boolean = $node->hasAttribute( $aname );
$boolean = $node->hasAttributeNS( $nsURI, $aname );
@nodes = $node->getChildrenByTagName($tagname);
@nodes = $node->getChildrenByTagNameNS($nsURI,$tagname);
@nodes = $node->getChildrenByLocalName($localname);
@nodes = $node->getElementsByTagName($tagname);
@nodes = $node->getElementsByTagNameNS($nsURI,$localname);
@nodes = $node->getElementsByLocalName($localname);
$node->appendWellBalancedChunk( $chunk );
$node->appendText( $PCDATA );
$node->appendTextNode( $PCDATA );
$node->appendTextChild( $childname , $PCDATA );
$node->setNamespace( $nsURI , $nsPrefix, $activate );
$node->setNamespaceDeclURI( $nsPrefix, $newURI );
$node->setNamespaceDeclPrefix( $oldPrefix, $newPrefix );
=head1 METHODS
The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here.
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$node = XML::LibXML::Element->new( $name );
This function creates a new node unbound to any DOM.
=item setAttribute
$node->setAttribute( $aname, $avalue );
This method sets or replaces the node's attribute C<<<<<< $aname >>>>>> to the value C<<<<<< $avalue >>>>>>
=item setAttributeNS
$node->setAttributeNS( $nsURI, $aname, $avalue );
Namespace-aware version of C<<<<<< setAttribute >>>>>>, where C<<<<<< $nsURI >>>>>> is a namespace URI, C<<<<<< $aname >>>>>> is a qualified name, and C<<<<<< $avalue >>>>>> is the value. The namespace URI may be null (empty or undefined) in order to
create an attribute which has no namespace.
The current implementation differs from DOM in the following aspects
If an attribute with the same local name and namespace URI already exists on
the element, but its prefix differs from the prefix of C<<<<<< $aname >>>>>>, then this function is supposed to change the prefix (regardless of namespace
declarations and possible collisions). However, the current implementation does
rather the opposite. If a prefix is declared for the namespace URI in the scope
of the attribute, then the already declared prefix is used, disregarding the
prefix specified in C<<<<<< $aname >>>>>>. If no prefix is declared for the namespace, the function tries to declare the
prefix specified in C<<<<<< $aname >>>>>> and dies if the prefix is already taken by some other namespace.
According to DOM Level 2 specification, this method can also be used to create
or modify special attributes used for declaring XML namespaces (which belong to
the namespace "http://www.w3.org/2000/xmlns/" and have prefix or name "xmlns").
This should work since version 1.61, but again the implementation differs from
DOM specification in the following: if a declaration of the same namespace
prefix already exists on the element, then changing its value via this method
automatically changes the namespace of all elements and attributes in its
scope. This is because in libxml2 the namespace URI of an element is not static
but is computed from a pointer to a namespace declaration attribute.
=item getAttribute
$avalue = $node->getAttribute( $aname );
If C<<<<<< $node >>>>>> has an attribute with the name C<<<<<< $aname >>>>>>, the value of this attribute will get returned.
=item getAttributeNS
$avalue = $node->getAttributeNS( $nsURI, $aname );
Retrieves an attribute value by local name and namespace URI.
=item getAttributeNode
$attrnode = $node->getAttributeNode( $aname );
Retrieve an attribute node by name. If no attribute with a given name exists, C<<<<<< undef >>>>>> is returned.
=item getAttributeNodeNS
$attrnode = $node->getAttributeNodeNS( $namespaceURI, $aname );
Retrieves an attribute node by local name and namespace URI. If no attribute
with a given localname and namespace exists, C<<<<<< undef >>>>>> is returned.
=item removeAttribute
$node->removeAttribute( $aname );
The method removes the attribute C<<<<<< $aname >>>>>> from the node's attribute list, if the attribute can be found.
=item removeAttributeNS
$node->removeAttributeNS( $nsURI, $aname );
Namespace version of C<<<<<< removeAttribute >>>>>>
=item hasAttribute
$boolean = $node->hasAttribute( $aname );
This function tests if the named attribute is set for the node. If the
attribute is specified, TRUE (1) will be returned, otherwise the return value
is FALSE (0).
=item hasAttributeNS
$boolean = $node->hasAttributeNS( $nsURI, $aname );
namespace version of C<<<<<< hasAttribute >>>>>>
=item getChildrenByTagName
@nodes = $node->getChildrenByTagName($tagname);
The function gives direct access to all child elements of the current node with
a given tagname, where tagname is a qualified name, that is, in case of
namespace usage it may consist of a prefix and local name. This function makes
things a lot easier if one needs to handle big data sets. A special tagname '*'
can be used to match any name.
If this function is called in SCALAR context, it returns the number of elements
found.
=item getChildrenByTagNameNS
@nodes = $node->getChildrenByTagNameNS($nsURI,$tagname);
Namespace version of C<<<<<< getChildrenByTagName >>>>>>. A special nsURI '*' matches any namespace URI, in which case the function
behaves just like C<<<<<< getChildrenByLocalName >>>>>>.
If this function is called in SCALAR context, it returns the number of elements
found.
=item getChildrenByLocalName
@nodes = $node->getChildrenByLocalName($localname);
The function gives direct access to all child elements of the current node with
a given local name. It makes things a lot easier if one needs to handle big
data sets. A special C<<<<<< localname >>>>>> '*' can be used to match any local name.
If this function is called in SCALAR context, it returns the number of elements
found.
=item getElementsByTagName
@nodes = $node->getElementsByTagName($tagname);
This function is part of the spec. It fetches all descendants of a node with a
given tagname, where C<<<<<< tagname >>>>>> is a qualified name, that is, in case of namespace usage it may consist of a
prefix and local name. A special C<<<<<< tagname >>>>>> '*' can be used to match any tag name.
In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item getElementsByTagNameNS
@nodes = $node->getElementsByTagNameNS($nsURI,$localname);
Namespace version of C<<<<<< getElementsByTagName >>>>>> as found in the DOM spec. A special C<<<<<< localname >>>>>> '*' can be used to match any local name and C<<<<<< nsURI >>>>>> '*' can be used to match any namespace URI.
In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item getElementsByLocalName
@nodes = $node->getElementsByLocalName($localname);
This function is not found in the DOM specification. It is a mix of
getElementsByTagName and getElementsByTagNameNS. It will fetch all tags
matching the given local-name. This allows one to select tags with the same
local name across namespace borders.
In SCALAR context this function returns an L<<<<<< XML::LibXML::NodeList >>>>>> object.
=item appendWellBalancedChunk
$node->appendWellBalancedChunk( $chunk );
Sometimes it is necessary to append a string coded XML Tree to a node. I<<<<<< appendWellBalancedChunk >>>>>> will do the trick for you. But this is only done if the String is C<<<<<< well-balanced >>>>>>.
I<<<<<< Note that appendWellBalancedChunk() is only left for compatibility reasons >>>>>>. Implicitly it uses
my $fragment = $parser->parse_balanced_chunk( $chunk );
$node->appendChild( $fragment );
This form is more explicit and makes it easier to control the flow of a script.
=item appendText
$node->appendText( $PCDATA );
alias for appendTextNode().
=item appendTextNode
$node->appendTextNode( $PCDATA );
This wrapper function lets you add a string directly to an element node.
=item appendTextChild
$node->appendTextChild( $childname , $PCDATA );
Somewhat similar with C<<<<<< appendTextNode >>>>>>: It lets you set an Element, that contains only a C<<<<<< text node >>>>>> directly by specifying the name and the text content.
=item setNamespace
$node->setNamespace( $nsURI , $nsPrefix, $activate );
setNamespace() allows one to apply a namespace to an element. The function
takes three parameters: 1. the namespace URI, which is required and the two
optional values prefix, which is the namespace prefix, as it should be used in
child elements or attributes as well as the additional activate parameter. If
prefix is not given, undefined or empty, this function tries to create a
declaration of the default namespace.
The activate parameter is most useful: If this parameter is set to FALSE (0), a
new namespace declaration is simply added to the element while the element's
namespace itself is not altered. Nevertheless, activate is set to TRUE (1) on
default. In this case the namespace is used as the node's effective namespace.
This means the namespace prefix is added to the node name and if there was a
namespace already active for the node, it will be replaced (but its declaration
is not removed from the document). A new namespace declaration is only created
if necessary (that is, if the element is already in the scope of a namespace
declaration associating the prefix with the namespace URI, then this
declaration is reused).
The following example may clarify this:
my $e1 = $doc->createElement("bar");
$e1->setNamespace("http://foobar.org", "foo")
results
<foo:bar xmlns:foo="http://foobar.org"/>
while
my $e2 = $doc->createElement("bar");
$e2->setNamespace("http://foobar.org", "foo",0)
results only
<bar xmlns:foo="http://foobar.org"/>
By using $activate == 0 it is possible to create multiple namespace
declarations on a single element.
The function fails if it is required to create a declaration associating the
prefix with the namespace URI but the element already carries a declaration
with the same prefix but different namespace URI.
=item setNamespaceDeclURI
$node->setNamespaceDeclURI( $nsPrefix, $newURI );
EXPERIMENTAL IN 1.61 !
This function manipulates directly with an existing namespace declaration on an
element. It takes two parameters: the prefix by which it looks up the namespace
declaration and a new namespace URI which replaces its previous value.
It returns 1 if the namespace declaration was found and changed, 0 otherwise.
All elements and attributes (even those previously unbound from the document)
for which the namespace declaration determines their namespace belong to the
new namespace after the change.
If the new URI is undef or empty, the nodes have no namespace and no prefix
after the change. Namespace declarations once nulled in this way do not further
appear in the serialized output (but do remain in the document for internal
integrity of libxml2 data structures).
This function is NOT part of any DOM API.
=item setNamespaceDeclPrefix
$node->setNamespaceDeclPrefix( $oldPrefix, $newPrefix );
EXPERIMENTAL IN 1.61 !
This function manipulates directly with an existing namespace declaration on an
element. It takes two parameters: the old prefix by which it looks up the
namespace declaration and a new prefix which is to replace the old one.
The function dies with an error if the element is in the scope of another
declaration whose prefix equals to the new prefix, or if the change should
result in a declaration with a non-empty prefix but empty namespace URI.
Otherwise, it returns 1 if the namespace declaration was found and changed and
0 if not found.
All elements and attributes (even those previously unbound from the document)
for which the namespace declaration determines their namespace change their
prefix to the new value.
If the new prefix is undef or empty, the namespace declaration becomes a
declaration of a default namespace. The corresponding nodes drop their
namespace prefix (but remain in the, now default, namespace). In this case the
function fails, if the containing element is in the scope of another default
namespace declaration.
This function is NOT part of any DOM API.
=back
=head1 OVERLOADING
XML::LibXML::Element overloads hash dereferencing to provide access to the
element's attributes. For non-namespaced attributes, the attribute name is the
hash key, and the attribute value is the hash value. For namespaced attributes,
the hash key is qualified with the namespace URI, using Clark notation.
Perl's "tied hash" feature is used, which means that the hash gives you
read-write access to the element's attributes. For more information, see L<<<<<< XML::LibXML::AttributeHash >>>>>>
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,501 @@
# $Id: ErrNo.pm,v 1.1.2.1 2004/04/20 20:09:48 pajas Exp $
#
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::ErrNo;
use strict;
use warnings;
use vars qw($VERSION);
$VERSION = "2.0206"; # VERSION TEMPLATE: DO NOT CHANGE
use constant ERR_OK => 0;
use constant ERR_INTERNAL_ERROR => 1;
use constant ERR_NO_MEMORY => 2;
use constant ERR_DOCUMENT_START => 3;
use constant ERR_DOCUMENT_EMPTY => 4;
use constant ERR_DOCUMENT_END => 5;
use constant ERR_INVALID_HEX_CHARREF => 6;
use constant ERR_INVALID_DEC_CHARREF => 7;
use constant ERR_INVALID_CHARREF => 8;
use constant ERR_INVALID_CHAR => 9;
use constant ERR_CHARREF_AT_EOF => 10;
use constant ERR_CHARREF_IN_PROLOG => 11;
use constant ERR_CHARREF_IN_EPILOG => 12;
use constant ERR_CHARREF_IN_DTD => 13;
use constant ERR_ENTITYREF_AT_EOF => 14;
use constant ERR_ENTITYREF_IN_PROLOG => 15;
use constant ERR_ENTITYREF_IN_EPILOG => 16;
use constant ERR_ENTITYREF_IN_DTD => 17;
use constant ERR_PEREF_AT_EOF => 18;
use constant ERR_PEREF_IN_PROLOG => 19;
use constant ERR_PEREF_IN_EPILOG => 20;
use constant ERR_PEREF_IN_INT_SUBSET => 21;
use constant ERR_ENTITYREF_NO_NAME => 22;
use constant ERR_ENTITYREF_SEMICOL_MISSING => 23;
use constant ERR_PEREF_NO_NAME => 24;
use constant ERR_PEREF_SEMICOL_MISSING => 25;
use constant ERR_UNDECLARED_ENTITY => 26;
use constant WAR_UNDECLARED_ENTITY => 27;
use constant ERR_UNPARSED_ENTITY => 28;
use constant ERR_ENTITY_IS_EXTERNAL => 29;
use constant ERR_ENTITY_IS_PARAMETER => 30;
use constant ERR_UNKNOWN_ENCODING => 31;
use constant ERR_UNSUPPORTED_ENCODING => 32;
use constant ERR_STRING_NOT_STARTED => 33;
use constant ERR_STRING_NOT_CLOSED => 34;
use constant ERR_NS_DECL_ERROR => 35;
use constant ERR_ENTITY_NOT_STARTED => 36;
use constant ERR_ENTITY_NOT_FINISHED => 37;
use constant ERR_LT_IN_ATTRIBUTE => 38;
use constant ERR_ATTRIBUTE_NOT_STARTED => 39;
use constant ERR_ATTRIBUTE_NOT_FINISHED => 40;
use constant ERR_ATTRIBUTE_WITHOUT_VALUE => 41;
use constant ERR_ATTRIBUTE_REDEFINED => 42;
use constant ERR_LITERAL_NOT_STARTED => 43;
use constant ERR_LITERAL_NOT_FINISHED => 44;
use constant ERR_COMMENT_NOT_FINISHED => 45;
use constant ERR_PI_NOT_STARTED => 46;
use constant ERR_PI_NOT_FINISHED => 47;
use constant ERR_NOTATION_NOT_STARTED => 48;
use constant ERR_NOTATION_NOT_FINISHED => 49;
use constant ERR_ATTLIST_NOT_STARTED => 50;
use constant ERR_ATTLIST_NOT_FINISHED => 51;
use constant ERR_MIXED_NOT_STARTED => 52;
use constant ERR_MIXED_NOT_FINISHED => 53;
use constant ERR_ELEMCONTENT_NOT_STARTED => 54;
use constant ERR_ELEMCONTENT_NOT_FINISHED => 55;
use constant ERR_XMLDECL_NOT_STARTED => 56;
use constant ERR_XMLDECL_NOT_FINISHED => 57;
use constant ERR_CONDSEC_NOT_STARTED => 58;
use constant ERR_CONDSEC_NOT_FINISHED => 59;
use constant ERR_EXT_SUBSET_NOT_FINISHED => 60;
use constant ERR_DOCTYPE_NOT_FINISHED => 61;
use constant ERR_MISPLACED_CDATA_END => 62;
use constant ERR_CDATA_NOT_FINISHED => 63;
use constant ERR_RESERVED_XML_NAME => 64;
use constant ERR_SPACE_REQUIRED => 65;
use constant ERR_SEPARATOR_REQUIRED => 66;
use constant ERR_NMTOKEN_REQUIRED => 67;
use constant ERR_NAME_REQUIRED => 68;
use constant ERR_PCDATA_REQUIRED => 69;
use constant ERR_URI_REQUIRED => 70;
use constant ERR_PUBID_REQUIRED => 71;
use constant ERR_LT_REQUIRED => 72;
use constant ERR_GT_REQUIRED => 73;
use constant ERR_LTSLASH_REQUIRED => 74;
use constant ERR_EQUAL_REQUIRED => 75;
use constant ERR_TAG_NAME_MISMATCH => 76;
use constant ERR_TAG_NOT_FINISHED => 77;
use constant ERR_STANDALONE_VALUE => 78;
use constant ERR_ENCODING_NAME => 79;
use constant ERR_HYPHEN_IN_COMMENT => 80;
use constant ERR_INVALID_ENCODING => 81;
use constant ERR_EXT_ENTITY_STANDALONE => 82;
use constant ERR_CONDSEC_INVALID => 83;
use constant ERR_VALUE_REQUIRED => 84;
use constant ERR_NOT_WELL_BALANCED => 85;
use constant ERR_EXTRA_CONTENT => 86;
use constant ERR_ENTITY_CHAR_ERROR => 87;
use constant ERR_ENTITY_PE_INTERNAL => 88;
use constant ERR_ENTITY_LOOP => 89;
use constant ERR_ENTITY_BOUNDARY => 90;
use constant ERR_INVALID_URI => 91;
use constant ERR_URI_FRAGMENT => 92;
use constant WAR_CATALOG_PI => 93;
use constant ERR_NO_DTD => 94;
use constant ERR_CONDSEC_INVALID_KEYWORD => 95;
use constant ERR_VERSION_MISSING => 96;
use constant WAR_UNKNOWN_VERSION => 97;
use constant WAR_LANG_VALUE => 98;
use constant WAR_NS_URI => 99;
use constant WAR_NS_URI_RELATIVE => 100;
use constant NS_ERR_XML_NAMESPACE => 200;
use constant NS_ERR_UNDEFINED_NAMESPACE => 201;
use constant NS_ERR_QNAME => 202;
use constant NS_ERR_ATTRIBUTE_REDEFINED => 203;
use constant DTD_ATTRIBUTE_DEFAULT => 500;
use constant DTD_ATTRIBUTE_REDEFINED => 501;
use constant DTD_ATTRIBUTE_VALUE => 502;
use constant DTD_CONTENT_ERROR => 503;
use constant DTD_CONTENT_MODEL => 504;
use constant DTD_CONTENT_NOT_DETERMINIST => 505;
use constant DTD_DIFFERENT_PREFIX => 506;
use constant DTD_ELEM_DEFAULT_NAMESPACE => 507;
use constant DTD_ELEM_NAMESPACE => 508;
use constant DTD_ELEM_REDEFINED => 509;
use constant DTD_EMPTY_NOTATION => 510;
use constant DTD_ENTITY_TYPE => 511;
use constant DTD_ID_FIXED => 512;
use constant DTD_ID_REDEFINED => 513;
use constant DTD_ID_SUBSET => 514;
use constant DTD_INVALID_CHILD => 515;
use constant DTD_INVALID_DEFAULT => 516;
use constant DTD_LOAD_ERROR => 517;
use constant DTD_MISSING_ATTRIBUTE => 518;
use constant DTD_MIXED_CORRUPT => 519;
use constant DTD_MULTIPLE_ID => 520;
use constant DTD_NO_DOC => 521;
use constant DTD_NO_DTD => 522;
use constant DTD_NO_ELEM_NAME => 523;
use constant DTD_NO_PREFIX => 524;
use constant DTD_NO_ROOT => 525;
use constant DTD_NOTATION_REDEFINED => 526;
use constant DTD_NOTATION_VALUE => 527;
use constant DTD_NOT_EMPTY => 528;
use constant DTD_NOT_PCDATA => 529;
use constant DTD_NOT_STANDALONE => 530;
use constant DTD_ROOT_NAME => 531;
use constant DTD_STANDALONE_WHITE_SPACE => 532;
use constant DTD_UNKNOWN_ATTRIBUTE => 533;
use constant DTD_UNKNOWN_ELEM => 534;
use constant DTD_UNKNOWN_ENTITY => 535;
use constant DTD_UNKNOWN_ID => 536;
use constant DTD_UNKNOWN_NOTATION => 537;
use constant HTML_STRUCURE_ERROR => 800;
use constant HTML_UNKNOWN_TAG => 801;
use constant RNGP_ANYNAME_ATTR_ANCESTOR => 1000;
use constant RNGP_ATTR_CONFLICT => 1001;
use constant RNGP_ATTRIBUTE_CHILDREN => 1002;
use constant RNGP_ATTRIBUTE_CONTENT => 1003;
use constant RNGP_ATTRIBUTE_EMPTY => 1004;
use constant RNGP_ATTRIBUTE_NOOP => 1005;
use constant RNGP_CHOICE_CONTENT => 1006;
use constant RNGP_CHOICE_EMPTY => 1007;
use constant RNGP_CREATE_FAILURE => 1008;
use constant RNGP_DATA_CONTENT => 1009;
use constant RNGP_DEF_CHOICE_AND_INTERLEAVE => 1010;
use constant RNGP_DEFINE_CREATE_FAILED => 1011;
use constant RNGP_DEFINE_EMPTY => 1012;
use constant RNGP_DEFINE_MISSING => 1013;
use constant RNGP_DEFINE_NAME_MISSING => 1014;
use constant RNGP_ELEM_CONTENT_EMPTY => 1015;
use constant RNGP_ELEM_CONTENT_ERROR => 1016;
use constant RNGP_ELEMENT_EMPTY => 1017;
use constant RNGP_ELEMENT_CONTENT => 1018;
use constant RNGP_ELEMENT_NAME => 1019;
use constant RNGP_ELEMENT_NO_CONTENT => 1020;
use constant RNGP_ELEM_TEXT_CONFLICT => 1021;
use constant RNGP_EMPTY => 1022;
use constant RNGP_EMPTY_CONSTRUCT => 1023;
use constant RNGP_EMPTY_CONTENT => 1024;
use constant RNGP_EMPTY_NOT_EMPTY => 1025;
use constant RNGP_ERROR_TYPE_LIB => 1026;
use constant RNGP_EXCEPT_EMPTY => 1027;
use constant RNGP_EXCEPT_MISSING => 1028;
use constant RNGP_EXCEPT_MULTIPLE => 1029;
use constant RNGP_EXCEPT_NO_CONTENT => 1030;
use constant RNGP_EXTERNALREF_EMTPY => 1031;
use constant RNGP_EXTERNAL_REF_FAILURE => 1032;
use constant RNGP_EXTERNALREF_RECURSE => 1033;
use constant RNGP_FORBIDDEN_ATTRIBUTE => 1034;
use constant RNGP_FOREIGN_ELEMENT => 1035;
use constant RNGP_GRAMMAR_CONTENT => 1036;
use constant RNGP_GRAMMAR_EMPTY => 1037;
use constant RNGP_GRAMMAR_MISSING => 1038;
use constant RNGP_GRAMMAR_NO_START => 1039;
use constant RNGP_GROUP_ATTR_CONFLICT => 1040;
use constant RNGP_HREF_ERROR => 1041;
use constant RNGP_INCLUDE_EMPTY => 1042;
use constant RNGP_INCLUDE_FAILURE => 1043;
use constant RNGP_INCLUDE_RECURSE => 1044;
use constant RNGP_INTERLEAVE_ADD => 1045;
use constant RNGP_INTERLEAVE_CREATE_FAILED => 1046;
use constant RNGP_INTERLEAVE_EMPTY => 1047;
use constant RNGP_INTERLEAVE_NO_CONTENT => 1048;
use constant RNGP_INVALID_DEFINE_NAME => 1049;
use constant RNGP_INVALID_URI => 1050;
use constant RNGP_INVALID_VALUE => 1051;
use constant RNGP_MISSING_HREF => 1052;
use constant RNGP_NAME_MISSING => 1053;
use constant RNGP_NEED_COMBINE => 1054;
use constant RNGP_NOTALLOWED_NOT_EMPTY => 1055;
use constant RNGP_NSNAME_ATTR_ANCESTOR => 1056;
use constant RNGP_NSNAME_NO_NS => 1057;
use constant RNGP_PARAM_FORBIDDEN => 1058;
use constant RNGP_PARAM_NAME_MISSING => 1059;
use constant RNGP_PARENTREF_CREATE_FAILED => 1060;
use constant RNGP_PARENTREF_NAME_INVALID => 1061;
use constant RNGP_PARENTREF_NO_NAME => 1062;
use constant RNGP_PARENTREF_NO_PARENT => 1063;
use constant RNGP_PARENTREF_NOT_EMPTY => 1064;
use constant RNGP_PARSE_ERROR => 1065;
use constant RNGP_PAT_ANYNAME_EXCEPT_ANYNAME => 1066;
use constant RNGP_PAT_ATTR_ATTR => 1067;
use constant RNGP_PAT_ATTR_ELEM => 1068;
use constant RNGP_PAT_DATA_EXCEPT_ATTR => 1069;
use constant RNGP_PAT_DATA_EXCEPT_ELEM => 1070;
use constant RNGP_PAT_DATA_EXCEPT_EMPTY => 1071;
use constant RNGP_PAT_DATA_EXCEPT_GROUP => 1072;
use constant RNGP_PAT_DATA_EXCEPT_INTERLEAVE => 1073;
use constant RNGP_PAT_DATA_EXCEPT_LIST => 1074;
use constant RNGP_PAT_DATA_EXCEPT_ONEMORE => 1075;
use constant RNGP_PAT_DATA_EXCEPT_REF => 1076;
use constant RNGP_PAT_DATA_EXCEPT_TEXT => 1077;
use constant RNGP_PAT_LIST_ATTR => 1078;
use constant RNGP_PAT_LIST_ELEM => 1079;
use constant RNGP_PAT_LIST_INTERLEAVE => 1080;
use constant RNGP_PAT_LIST_LIST => 1081;
use constant RNGP_PAT_LIST_REF => 1082;
use constant RNGP_PAT_LIST_TEXT => 1083;
use constant RNGP_PAT_NSNAME_EXCEPT_ANYNAME => 1084;
use constant RNGP_PAT_NSNAME_EXCEPT_NSNAME => 1085;
use constant RNGP_PAT_ONEMORE_GROUP_ATTR => 1086;
use constant RNGP_PAT_ONEMORE_INTERLEAVE_ATTR => 1087;
use constant RNGP_PAT_START_ATTR => 1088;
use constant RNGP_PAT_START_DATA => 1089;
use constant RNGP_PAT_START_EMPTY => 1090;
use constant RNGP_PAT_START_GROUP => 1091;
use constant RNGP_PAT_START_INTERLEAVE => 1092;
use constant RNGP_PAT_START_LIST => 1093;
use constant RNGP_PAT_START_ONEMORE => 1094;
use constant RNGP_PAT_START_TEXT => 1095;
use constant RNGP_PAT_START_VALUE => 1096;
use constant RNGP_PREFIX_UNDEFINED => 1097;
use constant RNGP_REF_CREATE_FAILED => 1098;
use constant RNGP_REF_CYCLE => 1099;
use constant RNGP_REF_NAME_INVALID => 1100;
use constant RNGP_REF_NO_DEF => 1101;
use constant RNGP_REF_NO_NAME => 1102;
use constant RNGP_REF_NOT_EMPTY => 1103;
use constant RNGP_START_CHOICE_AND_INTERLEAVE => 1104;
use constant RNGP_START_CONTENT => 1105;
use constant RNGP_START_EMPTY => 1106;
use constant RNGP_START_MISSING => 1107;
use constant RNGP_TEXT_EXPECTED => 1108;
use constant RNGP_TEXT_HAS_CHILD => 1109;
use constant RNGP_TYPE_MISSING => 1110;
use constant RNGP_TYPE_NOT_FOUND => 1111;
use constant RNGP_TYPE_VALUE => 1112;
use constant RNGP_UNKNOWN_ATTRIBUTE => 1113;
use constant RNGP_UNKNOWN_COMBINE => 1114;
use constant RNGP_UNKNOWN_CONSTRUCT => 1115;
use constant RNGP_UNKNOWN_TYPE_LIB => 1116;
use constant RNGP_URI_FRAGMENT => 1117;
use constant RNGP_URI_NOT_ABSOLUTE => 1118;
use constant RNGP_VALUE_EMPTY => 1119;
use constant RNGP_VALUE_NO_CONTENT => 1120;
use constant RNGP_XMLNS_NAME => 1121;
use constant RNGP_XML_NS => 1122;
use constant XPATH_EXPRESSION_OK => 1200;
use constant XPATH_NUMBER_ERROR => 1201;
use constant XPATH_UNFINISHED_LITERAL_ERROR => 1202;
use constant XPATH_START_LITERAL_ERROR => 1203;
use constant XPATH_VARIABLE_REF_ERROR => 1204;
use constant XPATH_UNDEF_VARIABLE_ERROR => 1205;
use constant XPATH_INVALID_PREDICATE_ERROR => 1206;
use constant XPATH_EXPR_ERROR => 1207;
use constant XPATH_UNCLOSED_ERROR => 1208;
use constant XPATH_UNKNOWN_FUNC_ERROR => 1209;
use constant XPATH_INVALID_OPERAND => 1210;
use constant XPATH_INVALID_TYPE => 1211;
use constant XPATH_INVALID_ARITY => 1212;
use constant XPATH_INVALID_CTXT_SIZE => 1213;
use constant XPATH_INVALID_CTXT_POSITION => 1214;
use constant XPATH_MEMORY_ERROR => 1215;
use constant XPTR_SYNTAX_ERROR => 1216;
use constant XPTR_RESOURCE_ERROR => 1217;
use constant XPTR_SUB_RESOURCE_ERROR => 1218;
use constant XPATH_UNDEF_PREFIX_ERROR => 1219;
use constant XPATH_ENCODING_ERROR => 1220;
use constant XPATH_INVALID_CHAR_ERROR => 1221;
use constant TREE_INVALID_HEX => 1300;
use constant TREE_INVALID_DEC => 1301;
use constant TREE_UNTERMINATED_ENTITY => 1302;
use constant SAVE_NOT_UTF8 => 1400;
use constant SAVE_CHAR_INVALID => 1401;
use constant SAVE_NO_DOCTYPE => 1402;
use constant SAVE_UNKNOWN_ENCODING => 1403;
use constant REGEXP_COMPILE_ERROR => 1450;
use constant IO_UNKNOWN => 1500;
use constant IO_EACCES => 1501;
use constant IO_EAGAIN => 1502;
use constant IO_EBADF => 1503;
use constant IO_EBADMSG => 1504;
use constant IO_EBUSY => 1505;
use constant IO_ECANCELED => 1506;
use constant IO_ECHILD => 1507;
use constant IO_EDEADLK => 1508;
use constant IO_EDOM => 1509;
use constant IO_EEXIST => 1510;
use constant IO_EFAULT => 1511;
use constant IO_EFBIG => 1512;
use constant IO_EINPROGRESS => 1513;
use constant IO_EINTR => 1514;
use constant IO_EINVAL => 1515;
use constant IO_EIO => 1516;
use constant IO_EISDIR => 1517;
use constant IO_EMFILE => 1518;
use constant IO_EMLINK => 1519;
use constant IO_EMSGSIZE => 1520;
use constant IO_ENAMETOOLONG => 1521;
use constant IO_ENFILE => 1522;
use constant IO_ENODEV => 1523;
use constant IO_ENOENT => 1524;
use constant IO_ENOEXEC => 1525;
use constant IO_ENOLCK => 1526;
use constant IO_ENOMEM => 1527;
use constant IO_ENOSPC => 1528;
use constant IO_ENOSYS => 1529;
use constant IO_ENOTDIR => 1530;
use constant IO_ENOTEMPTY => 1531;
use constant IO_ENOTSUP => 1532;
use constant IO_ENOTTY => 1533;
use constant IO_ENXIO => 1534;
use constant IO_EPERM => 1535;
use constant IO_EPIPE => 1536;
use constant IO_ERANGE => 1537;
use constant IO_EROFS => 1538;
use constant IO_ESPIPE => 1539;
use constant IO_ESRCH => 1540;
use constant IO_ETIMEDOUT => 1541;
use constant IO_EXDEV => 1542;
use constant IO_NETWORK_ATTEMPT => 1543;
use constant IO_ENCODER => 1544;
use constant IO_FLUSH => 1545;
use constant IO_WRITE => 1546;
use constant IO_NO_INPUT => 1547;
use constant IO_BUFFER_FULL => 1548;
use constant IO_LOAD_ERROR => 1549;
use constant IO_ENOTSOCK => 1550;
use constant IO_EISCONN => 1551;
use constant IO_ECONNREFUSED => 1552;
use constant IO_ENETUNREACH => 1553;
use constant IO_EADDRINUSE => 1554;
use constant IO_EALREADY => 1555;
use constant IO_EAFNOSUPPORT => 1556;
use constant XINCLUDE_RECURSION => 1600;
use constant XINCLUDE_PARSE_VALUE => 1601;
use constant XINCLUDE_ENTITY_DEF_MISMATCH => 1602;
use constant XINCLUDE_NO_HREF => 1603;
use constant XINCLUDE_NO_FALLBACK => 1604;
use constant XINCLUDE_HREF_URI => 1605;
use constant XINCLUDE_TEXT_FRAGMENT => 1606;
use constant XINCLUDE_TEXT_DOCUMENT => 1607;
use constant XINCLUDE_INVALID_CHAR => 1608;
use constant XINCLUDE_BUILD_FAILED => 1609;
use constant XINCLUDE_UNKNOWN_ENCODING => 1610;
use constant XINCLUDE_MULTIPLE_ROOT => 1611;
use constant XINCLUDE_XPTR_FAILED => 1612;
use constant XINCLUDE_XPTR_RESULT => 1613;
use constant XINCLUDE_INCLUDE_IN_INCLUDE => 1614;
use constant XINCLUDE_FALLBACKS_IN_INCLUDE => 1615;
use constant XINCLUDE_FALLBACK_NOT_IN_INCLUDE => 1616;
use constant CATALOG_MISSING_ATTR => 1650;
use constant CATALOG_ENTRY_BROKEN => 1651;
use constant CATALOG_PREFER_VALUE => 1652;
use constant CATALOG_NOT_CATALOG => 1653;
use constant CATALOG_RECURSION => 1654;
use constant SCHEMAP_PREFIX_UNDEFINED => 1700;
use constant SCHEMAP_ATTRFORMDEFAULT_VALUE => 1701;
use constant SCHEMAP_ATTRGRP_NONAME_NOREF => 1702;
use constant SCHEMAP_ATTR_NONAME_NOREF => 1703;
use constant SCHEMAP_COMPLEXTYPE_NONAME_NOREF => 1704;
use constant SCHEMAP_ELEMFORMDEFAULT_VALUE => 1705;
use constant SCHEMAP_ELEM_NONAME_NOREF => 1706;
use constant SCHEMAP_EXTENSION_NO_BASE => 1707;
use constant SCHEMAP_FACET_NO_VALUE => 1708;
use constant SCHEMAP_FAILED_BUILD_IMPORT => 1709;
use constant SCHEMAP_GROUP_NONAME_NOREF => 1710;
use constant SCHEMAP_IMPORT_NAMESPACE_NOT_URI => 1711;
use constant SCHEMAP_IMPORT_REDEFINE_NSNAME => 1712;
use constant SCHEMAP_IMPORT_SCHEMA_NOT_URI => 1713;
use constant SCHEMAP_INVALID_BOOLEAN => 1714;
use constant SCHEMAP_INVALID_ENUM => 1715;
use constant SCHEMAP_INVALID_FACET => 1716;
use constant SCHEMAP_INVALID_FACET_VALUE => 1717;
use constant SCHEMAP_INVALID_MAXOCCURS => 1718;
use constant SCHEMAP_INVALID_MINOCCURS => 1719;
use constant SCHEMAP_INVALID_REF_AND_SUBTYPE => 1720;
use constant SCHEMAP_INVALID_WHITE_SPACE => 1721;
use constant SCHEMAP_NOATTR_NOREF => 1722;
use constant SCHEMAP_NOTATION_NO_NAME => 1723;
use constant SCHEMAP_NOTYPE_NOREF => 1724;
use constant SCHEMAP_REF_AND_SUBTYPE => 1725;
use constant SCHEMAP_RESTRICTION_NONAME_NOREF => 1726;
use constant SCHEMAP_SIMPLETYPE_NONAME => 1727;
use constant SCHEMAP_TYPE_AND_SUBTYPE => 1728;
use constant SCHEMAP_UNKNOWN_ALL_CHILD => 1729;
use constant SCHEMAP_UNKNOWN_ANYATTRIBUTE_CHILD => 1730;
use constant SCHEMAP_UNKNOWN_ATTR_CHILD => 1731;
use constant SCHEMAP_UNKNOWN_ATTRGRP_CHILD => 1732;
use constant SCHEMAP_UNKNOWN_ATTRIBUTE_GROUP => 1733;
use constant SCHEMAP_UNKNOWN_BASE_TYPE => 1734;
use constant SCHEMAP_UNKNOWN_CHOICE_CHILD => 1735;
use constant SCHEMAP_UNKNOWN_COMPLEXCONTENT_CHILD => 1736;
use constant SCHEMAP_UNKNOWN_COMPLEXTYPE_CHILD => 1737;
use constant SCHEMAP_UNKNOWN_ELEM_CHILD => 1738;
use constant SCHEMAP_UNKNOWN_EXTENSION_CHILD => 1739;
use constant SCHEMAP_UNKNOWN_FACET_CHILD => 1740;
use constant SCHEMAP_UNKNOWN_FACET_TYPE => 1741;
use constant SCHEMAP_UNKNOWN_GROUP_CHILD => 1742;
use constant SCHEMAP_UNKNOWN_IMPORT_CHILD => 1743;
use constant SCHEMAP_UNKNOWN_LIST_CHILD => 1744;
use constant SCHEMAP_UNKNOWN_NOTATION_CHILD => 1745;
use constant SCHEMAP_UNKNOWN_PROCESSCONTENT_CHILD => 1746;
use constant SCHEMAP_UNKNOWN_REF => 1747;
use constant SCHEMAP_UNKNOWN_RESTRICTION_CHILD => 1748;
use constant SCHEMAP_UNKNOWN_SCHEMAS_CHILD => 1749;
use constant SCHEMAP_UNKNOWN_SEQUENCE_CHILD => 1750;
use constant SCHEMAP_UNKNOWN_SIMPLECONTENT_CHILD => 1751;
use constant SCHEMAP_UNKNOWN_SIMPLETYPE_CHILD => 1752;
use constant SCHEMAP_UNKNOWN_TYPE => 1753;
use constant SCHEMAP_UNKNOWN_UNION_CHILD => 1754;
use constant SCHEMAP_ELEM_DEFAULT_FIXED => 1755;
use constant SCHEMAP_REGEXP_INVALID => 1756;
use constant SCHEMAP_FAILED_LOAD => 1756;
use constant SCHEMAP_NOTHING_TO_PARSE => 1757;
use constant SCHEMAP_NOROOT => 1758;
use constant SCHEMAP_REDEFINED_GROUP => 1759;
use constant SCHEMAP_REDEFINED_TYPE => 1760;
use constant SCHEMAP_REDEFINED_ELEMENT => 1761;
use constant SCHEMAP_REDEFINED_ATTRGROUP => 1762;
use constant SCHEMAP_REDEFINED_ATTR => 1763;
use constant SCHEMAP_REDEFINED_NOTATION => 1764;
use constant SCHEMAP_FAILED_PARSE => 1765;
use constant SCHEMAV_NOROOT => 1800;
use constant SCHEMAV_UNDECLAREDELEM => 1801;
use constant SCHEMAV_NOTTOPLEVEL => 1802;
use constant SCHEMAV_MISSING => 1803;
use constant SCHEMAV_WRONGELEM => 1804;
use constant SCHEMAV_NOTYPE => 1805;
use constant SCHEMAV_NOROLLBACK => 1806;
use constant SCHEMAV_ISABSTRACT => 1807;
use constant SCHEMAV_NOTEMPTY => 1808;
use constant SCHEMAV_ELEMCONT => 1809;
use constant SCHEMAV_HAVEDEFAULT => 1810;
use constant SCHEMAV_NOTNILLABLE => 1811;
use constant SCHEMAV_EXTRACONTENT => 1812;
use constant SCHEMAV_INVALIDATTR => 1813;
use constant SCHEMAV_INVALIDELEM => 1814;
use constant SCHEMAV_NOTDETERMINIST => 1815;
use constant SCHEMAV_CONSTRUCT => 1816;
use constant SCHEMAV_INTERNAL => 1817;
use constant SCHEMAV_NOTSIMPLE => 1818;
use constant SCHEMAV_ATTRUNKNOWN => 1819;
use constant SCHEMAV_ATTRINVALID => 1820;
use constant SCHEMAV_VALUE => 1821;
use constant SCHEMAV_FACET => 1822;
use constant XPTR_UNKNOWN_SCHEME => 1900;
use constant XPTR_CHILDSEQ_START => 1901;
use constant XPTR_EVAL_FAILED => 1902;
use constant XPTR_EXTRA_OBJECTS => 1903;
use constant C14N_CREATE_CTXT => 1950;
use constant C14N_REQUIRES_UTF8 => 1951;
use constant C14N_CREATE_STACK => 1952;
use constant C14N_INVALID_NODE => 1953;
use constant FTP_PASV_ANSWER => 2000;
use constant FTP_EPSV_ANSWER => 2001;
use constant FTP_ACCNT => 2002;
use constant HTTP_URL_SYNTAX => 2020;
use constant HTTP_USE_IP => 2021;
use constant HTTP_UNKNOWN_HOST => 2022;
1;

View File

@@ -0,0 +1,37 @@
=head1 NAME
XML::LibXML::ErrNo - Structured Errors
=head1 DESCRIPTION
This module is based on xmlerror.h libxml2 C header file. It defines symbolic
constants for all libxml2 error codes. Currently libxml2 uses over 480
different error codes. See also XML::LibXML::Error.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,261 @@
# $Id: Error.pm,v 1.1.2.1 2004/04/20 20:09:48 pajas Exp $
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::Error;
use strict;
use warnings;
# To avoid a "Deep recursion on subroutine as_string" warning
no warnings 'recursion';
use Encode ();
use vars qw(@error_domains $VERSION $WARNINGS);
use Carp;
use overload
'""' => \&as_string,
'eq' => sub {
("$_[0]" eq "$_[1]")
},
'cmp' => sub {
("$_[0]" cmp "$_[1]")
},
fallback => 1;
$WARNINGS = 0; # 0: suppress, 1: report via warn, 2: report via die
$VERSION = "2.0206"; # VERSION TEMPLATE: DO NOT CHANGE
use constant XML_ERR_NONE => 0;
use constant XML_ERR_WARNING => 1; # A simple warning
use constant XML_ERR_ERROR => 2; # A recoverable error
use constant XML_ERR_FATAL => 3; # A fatal error
use constant XML_ERR_FROM_NONE => 0;
use constant XML_ERR_FROM_PARSER => 1; # The XML parser
use constant XML_ERR_FROM_TREE => 2; # The tree module
use constant XML_ERR_FROM_NAMESPACE => 3; # The XML Namespace module
use constant XML_ERR_FROM_DTD => 4; # The XML DTD validation
use constant XML_ERR_FROM_HTML => 5; # The HTML parser
use constant XML_ERR_FROM_MEMORY => 6; # The memory allocator
use constant XML_ERR_FROM_OUTPUT => 7; # The serialization code
use constant XML_ERR_FROM_IO => 8; # The Input/Output stack
use constant XML_ERR_FROM_FTP => 9; # The FTP module
use constant XML_ERR_FROM_HTTP => 10; # The FTP module
use constant XML_ERR_FROM_XINCLUDE => 11; # The XInclude processing
use constant XML_ERR_FROM_XPATH => 12; # The XPath module
use constant XML_ERR_FROM_XPOINTER => 13; # The XPointer module
use constant XML_ERR_FROM_REGEXP => 14; # The regular expressions module
use constant XML_ERR_FROM_DATATYPE => 15; # The W3C XML Schemas Datatype module
use constant XML_ERR_FROM_SCHEMASP => 16; # The W3C XML Schemas parser module
use constant XML_ERR_FROM_SCHEMASV => 17; # The W3C XML Schemas validation module
use constant XML_ERR_FROM_RELAXNGP => 18; # The Relax-NG parser module
use constant XML_ERR_FROM_RELAXNGV => 19; # The Relax-NG validator module
use constant XML_ERR_FROM_CATALOG => 20; # The Catalog module
use constant XML_ERR_FROM_C14N => 21; # The Canonicalization module
use constant XML_ERR_FROM_XSLT => 22; # The XSLT engine from libxslt
use constant XML_ERR_FROM_VALID => 23; # The DTD validation module with valid context
use constant XML_ERR_FROM_CHECK => 24; # The error-checking module
use constant XML_ERR_FROM_WRITER => 25; # The xmlwriter module
use constant XML_ERR_FROM_MODULE => 26; # The dynamically-loaded module module
use constant XML_ERR_FROM_I18N => 27; # The module handling character conversion
use constant XML_ERR_FROM_SCHEMATRONV=> 28; # The Schematron validator module
@error_domains = ("", "parser", "tree", "namespace", "validity",
"HTML parser", "memory", "output", "I/O", "ftp",
"http", "XInclude", "XPath", "xpointer", "regexp",
"Schemas datatype", "Schemas parser", "Schemas validity",
"Relax-NG parser", "Relax-NG validity",
"Catalog", "C14N", "XSLT", "validity", "error-checking",
"xmlwriter", "dynamic loading", "i18n",
"Schematron validity");
my $MAX_ERROR_PREV_DEPTH = 100;
for my $field (qw<code _prev level file line nodename message column context
str1 str2 str3 num1 num2 __prev_depth>) {
my $method = sub { $_[0]{$field} };
no strict 'refs';
*$field = $method;
}
{
sub new {
my ($class,$xE) = @_;
my $terr;
if (ref($xE)) {
my ($context,$column) = $xE->context_and_column();
$terr =bless {
domain => $xE->domain(),
level => $xE->level(),
code => $xE->code(),
message => $xE->message(),
file => $xE->file(),
line => $xE->line(),
str1 => $xE->str1(),
str2 => $xE->str2(),
str3 => $xE->str3(),
num1 => $xE->num1(),
num2 => $xE->num2(),
__prev_depth => 0,
(defined($context) ?
(
context => $context,
column => $column,
) : ()),
}, $class;
} else {
# !!!! problem : got a flat error
# warn("PROBLEM: GOT A FLAT ERROR $xE\n");
$terr =bless {
domain => 0,
level => 2,
code => -1,
message => $xE,
file => undef,
line => undef,
str1 => undef,
str2 => undef,
str3 => undef,
num1 => undef,
num2 => undef,
__prev_depth => 0,
}, $class;
}
return $terr;
}
sub _callback_error {
#print "CALLBACK\n";
my ($xE,$prev) = @_;
my $terr;
$terr=XML::LibXML::Error->new($xE);
if ($terr->{level} == XML_ERR_WARNING and $WARNINGS!=2) {
warn $terr if $WARNINGS;
return $prev;
}
#unless ( defined $terr->{file} and length $terr->{file} ) {
# this would make it easier to recognize parsed strings
# but it breaks old implementations
# [CG] $terr->{file} = 'string()';
#}
#warn "Saving the error ",$terr->dump;
if (ref($prev))
{
if ($prev->__prev_depth() >= $MAX_ERROR_PREV_DEPTH)
{
return $prev;
}
$terr->{_prev} = $prev;
$terr->{__prev_depth} = $prev->__prev_depth() + 1;
}
else
{
$terr->{_prev} = defined($prev) && length($prev) ? XML::LibXML::Error->new($prev) : undef;
}
return $terr;
}
sub _instant_error_callback {
my $xE = shift;
my $terr= XML::LibXML::Error->new($xE);
print "Reporting an instanteous error ",$terr->dump;
die $terr;
}
sub _report_warning {
my ($saved_error) = @_;
#print "CALLBACK WARN\n";
if ( defined $saved_error ) {
#print "reporting a warning ",$saved_error->dump;
warn $saved_error;
}
}
sub _report_error {
my ($saved_error) = @_;
#print "CALLBACK ERROR: $saved_error\n";
if ( defined $saved_error ) {
die $saved_error;
}
}
}
# backward compatibility
sub int1 { $_[0]->num1 }
sub int2 { $_[0]->num2 }
sub domain {
my ($self)=@_;
return undef unless ref($self);
my $domain = $self->{domain};
# Newer versions of libxml2 might yield errors in domains that aren't
# listed above. Invent something reasonable in that case.
return $domain < @error_domains ? $error_domains[$domain] : "domain_$domain";
}
sub as_string {
my ($self)=@_;
my $msg = "";
my $level;
if (defined($self->{_prev})) {
$msg = $self->{_prev}->as_string;
}
if ($self->{level} == XML_ERR_NONE) {
$level = "";
} elsif ($self->{level} == XML_ERR_WARNING) {
$level = "warning";
} elsif ($self->{level} == XML_ERR_ERROR ||
$self->{level} == XML_ERR_FATAL) {
$level = "error";
}
my $where="";
if (defined($self->{file})) {
$where="$self->{file}:$self->{line}";
} elsif (($self->{domain} == XML_ERR_FROM_PARSER)
and
$self->{line}) {
$where="Entity: line $self->{line}";
}
if ($self->{nodename}) {
$where.=": element ".$self->{nodename};
}
$msg.=$where.": " if $where ne "";
$msg.=$self->domain." ".$level." :";
my $str=$self->{message}||"";
chomp($str);
$msg.=" ".$str."\n";
if (($self->{domain} == XML_ERR_FROM_XPATH) and
defined($self->{str1})) {
$msg.=$self->{str1}."\n";
$msg.=(" " x $self->{num1})."^\n";
} elsif (defined $self->{context}) {
# If the error relates to character-encoding problems in the context,
# then doing textual operations on it will spew warnings that
# XML::LibXML can do nothing to fix. So just disable all such
# warnings. This has the pleasing benefit of making the test suite
# run warning-free.
no warnings 'utf8';
my $context = Encode::encode('utf8', $self->{context}, Encode::FB_DEFAULT);
$msg.=$context."\n";
$context = substr($context,0,$self->{column});
$context=~s/[^\t]/ /g;
$msg.=$context."^\n";
}
return $msg;
}
sub dump {
my ($self)=@_;
use Data::Dumper;
return Data::Dumper->new([$self],['error'])->Dump;
}
1;

View File

@@ -0,0 +1,264 @@
=head1 NAME
XML::LibXML::Error - Structured Errors
=head1 SYNOPSIS
eval { ... };
if (ref($@)) {
# handle a structured error (XML::LibXML::Error object)
} elsif ($@) {
# error, but not an XML::LibXML::Error object
} else {
# no error
}
$XML::LibXML::Error::WARNINGS=1;
$message = $@->as_string();
print $@->dump();
$error_domain = $@->domain();
$error_code = $@->code();
$error_message = $@->message();
$error_level = $@->level();
$filename = $@->file();
$line = $@->line();
$nodename = $@->nodename();
$error_str1 = $@->str1();
$error_str2 = $@->str2();
$error_str3 = $@->str3();
$error_num1 = $@->num1();
$error_num2 = $@->num2();
$string = $@->context();
$offset = $@->column();
$previous_error = $@->_prev();
=head1 DESCRIPTION
The XML::LibXML::Error class is a tiny frontend to I<<<<<< libxml2 >>>>>>'s structured error support. If XML::LibXML is compiled with structured error
support, all errors reported by libxml2 are transformed to XML::LibXML::Error
objects. These objects automatically serialize to the corresponding error
messages when printed or used in a string operation, but as objects, can also
be used to get a detailed and structured information about the error that
occurred.
Unlike most other XML::LibXML objects, XML::LibXML::Error doesn't wrap an
underlying I<<<<<< libxml2 >>>>>> structure directly, but rather transforms it to a blessed Perl hash reference
containing the individual fields of the structured error information as hash
key-value pairs. Individual items (fields) of a structured error can either be
obtained directly as $@->{field}, or using autoloaded methods such as
$@->field() (where field is the field name). XML::LibXML::Error objects have
the following fields: domain, code, level, file, line, nodename, message, str1,
str2, str3, num1, num2, and _prev (some of them may be undefined).
=over 4
=item $XML::LibXML::Error::WARNINGS
$XML::LibXML::Error::WARNINGS=1;
Traditionally, XML::LibXML was suppressing parser warnings by setting libxml2's
global variable xmlGetWarningsDefaultValue to 0. Since 1.70 we do not change
libxml2's global variables anymore; for backward compatibility, XML::LibXML
suppresses warnings. This variable can be set to 1 to enable reporting of these
warnings via Perl C<<<<<< warn >>>>>> and to 2 to report hem via C<<<<<< die >>>>>>.
=item as_string
$message = $@->as_string();
This function serializes an XML::LibXML::Error object to a string containing
the full error message close to the message produced by I<<<<<< libxml2 >>>>>> default error handlers and tools like xmllint. This method is also used to
overload "" operator on XML::LibXML::Error, so it is automatically called
whenever XML::LibXML::Error object is treated as a string (e.g. in print $@).
=item dump
print $@->dump();
This function serializes an XML::LibXML::Error to a string displaying all
fields of the error structure individually on separate lines of the form 'name'
=> 'value'.
=item domain
$error_domain = $@->domain();
Returns string containing information about what part of the library raised the
error. Can be one of: "parser", "tree", "namespace", "validity", "HTML parser",
"memory", "output", "I/O", "ftp", "http", "XInclude", "XPath", "xpointer",
"regexp", "Schemas datatype", "Schemas parser", "Schemas validity", "Relax-NG
parser", "Relax-NG validity", "Catalog", "C14N", "XSLT", "validity".
=item code
$error_code = $@->code();
Returns the actual libxml2 error code. The XML::LibXML::ErrNo module defines
constants for individual error codes. Currently libxml2 uses over 480 different
error codes.
=item message
$error_message = $@->message();
Returns a human-readable informative error message.
=item level
$error_level = $@->level();
Returns an integer value describing how consequent is the error.
XML::LibXML::Error defines the following constants:
=over 4
=item *
XML_ERR_NONE = 0
=item *
XML_ERR_WARNING = 1 : A simple warning.
=item *
XML_ERR_ERROR = 2 : A recoverable error.
=item *
XML_ERR_FATAL = 3 : A fatal error.
=back
=item file
$filename = $@->file();
Returns the filename of the file being processed while the error occurred.
=item line
$line = $@->line();
The line number, if available.
=item nodename
$nodename = $@->nodename();
Name of the node where error occurred, if available. When this field is
non-empty, libxml2 actually returned a physical pointer to the specified node.
Due to memory management issues, it is very difficult to implement a way to
expose the pointer to the Perl level as a XML::LibXML::Node. For this reason,
XML::LibXML::Error currently only exposes the name the node.
=item str1
$error_str1 = $@->str1();
Error specific. Extra string information.
=item str2
$error_str2 = $@->str2();
Error specific. Extra string information.
=item str3
$error_str3 = $@->str3();
Error specific. Extra string information.
=item num1
$error_num1 = $@->num1();
Error specific. Extra numeric information.
=item num2
$error_num2 = $@->num2();
In recent libxml2 versions, this value contains a column number of the error or
0 if N/A.
=item context
$string = $@->context();
For parsing errors, this field contains about 80 characters of the XML near the
place where the error occurred. The field C<<<<<< $@-E<gt>column() >>>>>> contains the corresponding offset. Where N/A, the field is undefined.
=item column
$offset = $@->column();
See C<<<<<< $@-E<gt>column() >>>>>> above.
=item _prev
$previous_error = $@->_prev();
This field can possibly hold a reference to another XML::LibXML::Error object
representing an error which occurred just before this error.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,300 @@
=head1 NAME
XML::LibXML::InputCallback - XML::LibXML Class for Input Callbacks
=head1 SYNOPSIS
use XML::LibXML;
=head1 DESCRIPTION
You may get unexpected results if you are trying to load external documents
during libxml2 parsing if the location of the resource is not a HTTP, FTP or
relative location but a absolute path for example. To get around this
limitation, you may add your own input handler to open, read and close
particular types of locations or URI classes. Using this input callback
handlers, you can handle your own custom URI schemes for example.
The input callbacks are used whenever XML::LibXML has to get something other
than externally parsed entities from somewhere. They are implemented using a
callback stack on the Perl layer in analogy to libxml2's native callback stack.
The XML::LibXML::InputCallback class transparently registers the input
callbacks for the libxml2's parser processes.
=head2 How does XML::LibXML::InputCallback work?
The libxml2 library offers a callback implementation as global functions only.
To work-around the troubles resulting in having only global callbacks - for
example, if the same global callback stack is manipulated by different
applications running together in a single Apache Web-server environment -,
XML::LibXML::InputCallback comes with a object-oriented and a function-oriented
part.
Using the function-oriented part the global callback stack of libxml2 can be
manipulated. Those functions can be used as interface to the callbacks on the
C- and XS Layer. At the object-oriented part, operations for working with the
"pseudo-localized" callback stack are implemented. Currently, you can register
and de-register callbacks on the Perl layer and initialize them on a per parser
basis.
=head3 Callback Groups
The libxml2 input callbacks come in groups. One group contains a URI matcher (I<<<<<< match >>>>>>), a data stream constructor (I<<<<<< open >>>>>>), a data stream reader (I<<<<<< read >>>>>>), and a data stream destructor (I<<<<<< close >>>>>>). The callbacks can be manipulated on a per group basis only.
=head3 The Parser Process
The parser process works on an XML data stream, along which, links to other
resources can be embedded. This can be links to external DTDs or XIncludes for
example. Those resources are identified by URIs. The callback implementation of
libxml2 assumes that one callback group can handle a certain amount of URIs and
a certain URI scheme. Per default, callback handlers for I<<<<<< file://* >>>>>>, I<<<<<< file:://*.gz >>>>>>, I<<<<<< http://* >>>>>> and I<<<<<< ftp://* >>>>>> are registered.
Callback groups in the callback stack are processed from top to bottom, meaning
that callback groups registered later will be processed before the earlier
registered ones.
While parsing the data stream, the libxml2 parser checks if a registered
callback group will handle a URI - if they will not, the URI will be
interpreted as I<<<<<< file://URI >>>>>>. To handle a URI, the I<<<<<< match >>>>>> callback will have to return '1'. If that happens, the handling of the URI will
be passed to that callback group. Next, the URI will be passed to the I<<<<<< open >>>>>> callback, which should return a I<<<<<< reference >>>>>> to the data stream if it successfully opened the file, '0' otherwise. If
opening the stream was successful, the I<<<<<< read >>>>>> callback will be called repeatedly until it returns an empty string. After the
read callback, the I<<<<<< close >>>>>> callback will be called to close the stream.
=head3 Organisation of callback groups in XML::LibXML::InputCallback
Callback groups are implemented as a stack (Array), each entry holds a
reference to an array of the callbacks. For the libxml2 library, the
XML::LibXML::InputCallback callback implementation appears as one single
callback group. The Perl implementation however allows one to manage different
callback stacks on a per libxml2-parser basis.
=head2 Using XML::LibXML::InputCallback
After object instantiation using the parameter-less constructor, you can
register callback groups.
my $input_callbacks = XML::LibXML::InputCallback->new();
$input_callbacks->register_callbacks([ $match_cb1, $open_cb1,
$read_cb1, $close_cb1 ] );
$input_callbacks->register_callbacks([ $match_cb2, $open_cb2,
$read_cb2, $close_cb2 ] );
$input_callbacks->register_callbacks( [ $match_cb3, $open_cb3,
$read_cb3, $close_cb3 ] );
$parser->input_callbacks( $input_callbacks );
$parser->parse_file( $some_xml_file );
=head2 What about the old callback system prior to XML::LibXML::InputCallback?
In XML::LibXML versions prior to 1.59 - i.e. without the
XML::LibXML::InputCallback module - you could define your callbacks either
using globally or locally. You still can do that using
XML::LibXML::InputCallback, and in addition to that you can define the
callbacks on a per parser basis!
If you use the old callback interface through global callbacks,
XML::LibXML::InputCallback will treat them with a lower priority as the ones
registered using the new interface. The global callbacks will not override the
callback groups registered using the new interface. Local callbacks are
attached to a specific parser instance, therefore they are treated with highest
priority. If the I<<<<<< match >>>>>> callback of the callback group registered as local variable is identical to one
of the callback groups registered using the new interface, that callback group
will be replaced.
Users of the old callback implementation whose I<<<<<< open >>>>>> callback returned a plain string, will have to adapt their code to return a
reference to that string after upgrading to version >= 1.59. The new callback
system can only deal with the I<<<<<< open >>>>>> callback returning a reference!
=head1 INTERFACE DESCRIPTION
=head2 Global Variables
=over 4
=item $_CUR_CB
Stores the current callback and can be used as shortcut to access the callback
stack.
=item @_GLOBAL_CALLBACKS
Stores all callback groups for the current parser process.
=item @_CB_STACK
Stores the currently used callback group. Used to prevent parser errors when
dealing with nested XML data.
=back
=head2 Global Callbacks
=over 4
=item _callback_match
Implements the interface for the I<<<<<< match >>>>>> callback at C-level and for the selection of the callback group from the
callbacks defined at the Perl-level.
=item _callback_open
Forwards the I<<<<<< open >>>>>> callback from libxml2 to the corresponding callback function at the Perl-level.
=item _callback_read
Forwards the read request to the corresponding callback function at the
Perl-level and returns the result to libxml2.
=item _callback_close
Forwards the I<<<<<< close >>>>>> callback from libxml2 to the corresponding callback function at the
Perl-level..
=back
=head2 Class methods
=over 4
=item new()
A simple constructor.
=item register_callbacks( [ $match_cb, $open_cb, $read_cb, $close_cb ])
The four callbacks I<<<<<< have >>>>>> to be given as array reference in the above order I<<<<<< match >>>>>>, I<<<<<< open >>>>>>, I<<<<<< read >>>>>>, I<<<<<< close >>>>>>!
=item unregister_callbacks( [ $match_cb, $open_cb, $read_cb, $close_cb ])
With no arguments given, C<<<<<< unregister_callbacks() >>>>>> will delete the last registered callback group from the stack. If four
callbacks are passed as array reference, the callback group to unregister will
be identified by the I<<<<<< match >>>>>> callback and deleted from the callback stack. Note that if several identical I<<<<<< match >>>>>> callbacks are defined in different callback groups, ALL of them will be deleted
from the stack.
=item init_callbacks( $parser )
Initializes the callback system for the provided parser before starting a
parsing process.
=item cleanup_callbacks()
Resets global variables and the libxml2 callback stack.
=item lib_init_callbacks()
Used internally for callback registration at C-level.
=item lib_cleanup_callbacks()
Used internally for callback resetting at the C-level.
=back
=head1 EXAMPLE CALLBACKS
The following example is a purely fictitious example that uses a
MyScheme::Handler object that responds to methods similar to an IO::Handle.
# Define the four callback functions
sub match_uri {
my $uri = shift;
return $uri =~ /^myscheme:/; # trigger our callback group at a 'myscheme' URIs
}
sub open_uri {
my $uri = shift;
my $handler = MyScheme::Handler->new($uri);
return $handler;
}
# The returned $buffer will be parsed by the libxml2 parser
sub read_uri {
my $handler = shift;
my $length = shift;
my $buffer;
read($handler, $buffer, $length);
return $buffer; # $buffer will be an empty string '' if read() is done
}
# Close the handle associated with the resource.
sub close_uri {
my $handler = shift;
close($handler);
}
# Register them with a instance of XML::LibXML::InputCallback
my $input_callbacks = XML::LibXML::InputCallback->new();
$input_callbacks->register_callbacks([ \&match_uri, \&open_uri,
\&read_uri, \&close_uri ] );
# Register the callback group at a parser instance
$parser->input_callbacks( $input_callbacks );
# $some_xml_file will be parsed using our callbacks
$parser->parse_file( $some_xml_file );
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,112 @@
# $Id$
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::Literal;
use XML::LibXML::Boolean;
use XML::LibXML::Number;
use strict;
use warnings;
use vars qw ($VERSION);
$VERSION = "2.0206"; # VERSION TEMPLATE: DO NOT CHANGE
use overload
'""' => \&value,
'cmp' => \&cmp;
sub new {
my $class = shift;
my ($string) = @_;
# $string =~ s/&quot;/"/g;
# $string =~ s/&apos;/'/g;
bless \$string, $class;
}
sub as_string {
my $self = shift;
my $string = $$self;
$string =~ s/'/&apos;/g;
return "'$string'";
}
sub as_xml {
my $self = shift;
my $string = $$self;
return "<Literal>$string</Literal>\n";
}
sub value {
my $self = shift;
$$self;
}
sub cmp {
my $self = shift;
my ($cmp, $swap) = @_;
if ($swap) {
return $cmp cmp $$self;
}
return $$self cmp $cmp;
}
sub evaluate {
my $self = shift;
$self;
}
sub to_boolean {
my $self = shift;
return (length($$self) > 0) ? XML::LibXML::Boolean->True : XML::LibXML::Boolean->False;
}
sub to_number { return XML::LibXML::Number->new($_[0]->value); }
sub to_literal { return $_[0]; }
sub string_value { return $_[0]->value; }
1;
__END__
=head1 NAME
XML::LibXML::Literal - Simple string values.
=head1 DESCRIPTION
In XPath terms a Literal is what we know as a string.
=head1 API
=head2 new($string)
Create a new Literal object with the value in $string. Note that &quot; and
&apos; will be converted to " and ' respectively. That is not part of the XPath
specification, but I consider it useful. Note though that you have to go
to extraordinary lengths in an XML template file (be it XSLT or whatever) to
make use of this:
<xsl:value-of select="&quot;I'm feeling &amp;quot;sad&amp;quot;&quot;"/>
Which produces a Literal of:
I'm feeling "sad"
=head2 value()
Also overloaded as stringification, simply returns the literal string value.
=head2 cmp($literal)
Returns the equivalent of perl's cmp operator against the given $literal.
=cut

View File

@@ -0,0 +1,161 @@
=head1 NAME
XML::LibXML::Namespace - XML::LibXML Namespace Implementation
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Namespace nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
my $ns = XML::LibXML::Namespace->new($nsURI);
print $ns->nodeName();
print $ns->name();
$localname = $ns->getLocalName();
print $ns->getData();
print $ns->getValue();
print $ns->value();
$known_uri = $ns->getNamespaceURI();
$known_prefix = $ns->getPrefix();
$key = $ns->unique_key();
=head1 DESCRIPTION
Namespace nodes are returned by both $element->findnodes('namespace::foo') or
by $node->getNamespaces().
The namespace node API is not part of any current DOM API, and so it is quite
minimal. It should be noted that namespace nodes are I<<<<<< not >>>>>> a sub class of L<<<<<< XML::LibXML::Node >>>>>>, however Namespace nodes act a lot like attribute nodes, and similarly named
methods will return what you would expect if you treated the namespace node as
an attribute. Note that in order to fix several inconsistencies between the API
and the documentation, the behavior of some functions have been changed in
1.64.
=head1 METHODS
=over 4
=item new
my $ns = XML::LibXML::Namespace->new($nsURI);
Creates a new Namespace node. Note that this is not a 'node' as an attribute or
an element node. Therefore you can't do call all L<<<<<< XML::LibXML::Node >>>>>> Functions. All functions available for this node are listed below.
Optionally you can pass the prefix to the namespace constructor. If this second
parameter is omitted you will create a so called default namespace. Note, the
newly created namespace is not bound to any document or node, therefore you
should not expect it to be available in an existing document.
=item declaredURI
Returns the URI for this namespace.
=item declaredPrefix
Returns the prefix for this namespace.
=item nodeName
print $ns->nodeName();
Returns "xmlns:prefix", where prefix is the prefix for this namespace.
=item name
print $ns->name();
Alias for nodeName()
=item getLocalName
$localname = $ns->getLocalName();
Returns the local name of this node as if it were an attribute, that is, the
prefix associated with the namespace.
=item getData
print $ns->getData();
Returns the URI of the namespace, i.e. the value of this node as if it were an
attribute.
=item getValue
print $ns->getValue();
Alias for getData()
=item value
print $ns->value();
Alias for getData()
=item getNamespaceURI
$known_uri = $ns->getNamespaceURI();
Returns the string "http://www.w3.org/2000/xmlns/"
=item getPrefix
$known_prefix = $ns->getPrefix();
Returns the string "xmlns"
=item unique_key
$key = $ns->unique_key();
This method returns a key guaranteed to be unique for this namespace, and to
always be the same value for this namespace. Two namespace objects return the
same key if and only if they have the same prefix and the same URI. The
returned key value is useful as a key in hashes.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,783 @@
=head1 NAME
XML::LibXML::Node - Abstract Base Class of XML::LibXML Nodes
=head1 SYNOPSIS
use XML::LibXML;
$name = $node->nodeName;
$node->setNodeName( $newName );
$bool = $node->isSameNode( $other_node );
$bool = $node->isEqual( $other_node );
$num = $node->unique_key;
$content = $node->nodeValue;
$content = $node->textContent;
$type = $node->nodeType;
$node->unbindNode();
$childnode = $node->removeChild( $childnode );
$oldnode = $node->replaceChild( $newNode, $oldNode );
$node->replaceNode($newNode);
$childnode = $node->appendChild( $childnode );
$childnode = $node->addChild( $childnode );
$node = $parent->addNewChild( $nsURI, $name );
$node->addSibling($newNode);
$newnode =$node->cloneNode( $deep );
$parentnode = $node->parentNode;
$nextnode = $node->nextSibling();
$nextnode = $node->nextNonBlankSibling();
$prevnode = $node->previousSibling();
$prevnode = $node->previousNonBlankSibling();
$boolean = $node->hasChildNodes();
$childnode = $node->firstChild;
$childnode = $node->lastChild;
$documentnode = $node->ownerDocument;
$node = $node->getOwner;
$node->setOwnerDocument( $doc );
$node->insertBefore( $newNode, $refNode );
$node->insertAfter( $newNode, $refNode );
@nodes = $node->findnodes( $xpath_expression );
$result = $node->find( $xpath );
print $node->findvalue( $xpath );
$bool = $node->exists( $xpath_expression );
@childnodes = $node->childNodes();
@childnodes = $node->nonBlankChildNodes();
$xmlstring = $node->toString($format,$docencoding);
$c14nstring = $node->toStringC14N();
$c14nstring = $node->toStringC14N($with_comments, $xpath_expression , $xpath_context);
$c14nstring = $node->toStringC14N_v1_1();
$c14nstring = $node->toStringC14N_v1_1($with_comments, $xpath_expression , $xpath_context);
$ec14nstring = $node->toStringEC14N();
$ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $inclusive_prefix_list);
$ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $xpath_context, $inclusive_prefix_list);
$str = $doc->serialize($format);
$localname = $node->localname;
$nameprefix = $node->prefix;
$uri = $node->namespaceURI();
$boolean = $node->hasAttributes();
@attributelist = $node->attributes();
$URI = $node->lookupNamespaceURI( $prefix );
$prefix = $node->lookupNamespacePrefix( $URI );
$node->normalize;
@nslist = $node->getNamespaces;
$node->removeChildNodes();
$strURI = $node->baseURI();
$node->setBaseURI($strURI);
$node->nodePath();
$lineno = $node->line_number();
=head1 DESCRIPTION
XML::LibXML::Node defines functions that are common to all Node Types. An
XML::LibXML::Node should never be created standalone, but as an instance of a
high level class such as XML::LibXML::Element or XML::LibXML::Text. The class
itself should provide only common functionality. In XML::LibXML each node is
part either of a document or a document-fragment. Because of this there is no
node without a parent. This may causes confusion with "unbound" nodes.
=head1 METHODS
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item nodeName
$name = $node->nodeName;
Returns the node's name. This function is aware of namespaces and returns the
full name of the current node (C<<<<<< prefix:localname >>>>>>).
Since 1.62 this function also returns the correct DOM names for node types with
constant names, namely: #text, #cdata-section, #comment, #document,
#document-fragment.
=item setNodeName
$node->setNodeName( $newName );
In very limited situations, it is useful to change a nodes name. In the DOM
specification this should throw an error. This Function is aware of namespaces.
=item isSameNode
$bool = $node->isSameNode( $other_node );
returns TRUE (1) if the given nodes refer to the same node structure, otherwise
FALSE (0) is returned.
=item isEqual
$bool = $node->isEqual( $other_node );
deprecated version of isSameNode().
I<<<<<< NOTE >>>>>> isEqual will change behaviour to follow the DOM specification
=item unique_key
$num = $node->unique_key;
This function is not specified for any DOM level. It returns a key guaranteed
to be unique for this node, and to always be the same value for this node. In
other words, two node objects return the same key if and only if isSameNode
indicates that they are the same node.
The returned key value is useful as a key in hashes.
=item nodeValue
$content = $node->nodeValue;
If the node has any content (such as stored in a C<<<<<< text node >>>>>>) it can get requested through this function.
I<<<<<< NOTE: >>>>>> Element Nodes have no content per definition. To get the text value of an
Element use textContent() instead!
=item textContent
$content = $node->textContent;
this function returns the content of all text nodes in the descendants of the
given node as specified in DOM.
=item nodeType
$type = $node->nodeType;
Return a numeric value representing the node type of this node. The module
XML::LibXML by default exports constants for the node types (see the EXPORT
section in the L<<<<<< XML::LibXML >>>>>> manual page).
=item unbindNode
$node->unbindNode();
Unbinds the Node from its siblings and Parent, but not from the Document it
belongs to. If the node is not inserted into the DOM afterwards, it will be
lost after the program terminates. From a low level view, the unbound node is
stripped from the context it is and inserted into a (hidden) document-fragment.
=item removeChild
$childnode = $node->removeChild( $childnode );
This will unbind the Child Node from its parent C<<<<<< $node >>>>>>. The function returns the unbound node. If C<<<<<< $childnode >>>>>> is not a child of the given Node the function will fail.
=item replaceChild
$oldnode = $node->replaceChild( $newNode, $oldNode );
Replaces the C<<<<<< $oldNode >>>>>> with the C<<<<<< $newNode >>>>>>. The C<<<<<< $oldNode >>>>>> will be unbound from the Node. This function differs from the DOM L2
specification, in the case, if the new node is not part of the document, the
node will be imported first.
=item replaceNode
$node->replaceNode($newNode);
This function is very similar to replaceChild(), but it replaces the node
itself rather than a childnode. This is useful if a node found by any XPath
function, should be replaced.
=item appendChild
$childnode = $node->appendChild( $childnode );
The function will add the C<<<<<< $childnode >>>>>> to the end of C<<<<<< $node >>>>>>'s children. The function should fail, if the new childnode is already a child
of C<<<<<< $node >>>>>>. This function differs from the DOM L2 specification, in the case, if the new
node is not part of the document, the node will be imported first.
=item addChild
$childnode = $node->addChild( $childnode );
As an alternative to appendChild() one can use the addChild() function. This
function is a bit faster, because it avoids all DOM conformity checks.
Therefore this function is quite useful if one builds XML documents in memory
where the order and ownership (C<<<<<< ownerDocument >>>>>>) is assured.
addChild() uses libxml2's own xmlAddChild() function. Thus it has to be used
with extra care: If a text node is added to a node and the node itself or its
last childnode is as well a text node, the node to add will be merged with the
one already available. The current node will be removed from memory after this
action. Because perl is not aware of this action, the perl instance is still
available. XML::LibXML will catch the loss of a node and refuse to run any
function called on that node.
my $t1 = $doc->createTextNode( "foo" );
my $t2 = $doc->createTextNode( "bar" );
$t1->addChild( $t2 ); # is OK
my $val = $t2->nodeValue(); # will fail, script dies
Also addChild() will not check if the added node belongs to the same document
as the node it will be added to. This could lead to inconsistent documents and
in more worse cases even to memory violations, if one does not keep track of
this issue.
Although this sounds like a lot of trouble, addChild() is useful if a document
is built from a stream, such as happens sometimes in SAX handlers or filters.
If you are not sure about the source of your nodes, you better stay with
appendChild(), because this function is more user friendly in the sense of
being more error tolerant.
=item addNewChild
$node = $parent->addNewChild( $nsURI, $name );
Similar to C<<<<<< addChild() >>>>>>, this function uses low level libxml2 functionality to provide faster
interface for DOM building. I<<<<<< addNewChild() >>>>>> uses C<<<<<< xmlNewChild() >>>>>> to create a new node on a given parent element.
addNewChild() has two parameters $nsURI and $name, where $nsURI is an
(optional) namespace URI. $name is the fully qualified element name;
addNewChild() will determine the correct prefix if necessary.
The function returns the newly created node.
This function is very useful for DOM building, where a created node can be
directly associated with its parent. I<<<<<< NOTE >>>>>> this function is not part of the DOM specification and its use will limit your
code to XML::LibXML.
=item addSibling
$node->addSibling($newNode);
addSibling() allows adding an additional node to the end of a nodelist, defined
by the given node.
=item cloneNode
$newnode =$node->cloneNode( $deep );
I<<<<<< cloneNode >>>>>> creates a copy of C<<<<<< $node >>>>>>. When $deep is set to 1 (true) the function will copy all child nodes as well.
If $deep is 0 only the current node will be copied. Note that in case of
element, attributes are copied even if $deep is 0.
Note that the behavior of this function for $deep=0 has changed in 1.62 in
order to be consistent with the DOM spec (in older versions attributes and
namespace information was not copied for elements).
=item parentNode
$parentnode = $node->parentNode;
Returns simply the Parent Node of the current node.
=item nextSibling
$nextnode = $node->nextSibling();
Returns the next sibling if any .
=item nextNonBlankSibling
$nextnode = $node->nextNonBlankSibling();
Returns the next non-blank sibling if any (a node is blank if it is a Text or
CDATA node consisting of whitespace only). This method is not defined by DOM.
=item previousSibling
$prevnode = $node->previousSibling();
Analogous to I<<<<<< getNextSibling >>>>>> the function returns the previous sibling if any.
=item previousNonBlankSibling
$prevnode = $node->previousNonBlankSibling();
Returns the previous non-blank sibling if any (a node is blank if it is a Text
or CDATA node consisting of whitespace only). This method is not defined by
DOM.
=item hasChildNodes
$boolean = $node->hasChildNodes();
If the current node has child nodes this function returns TRUE (1), otherwise
it returns FALSE (0, not undef).
=item firstChild
$childnode = $node->firstChild;
If a node has child nodes this function will return the first node in the child
list.
=item lastChild
$childnode = $node->lastChild;
If the C<<<<<< $node >>>>>> has child nodes this function returns the last child node.
=item ownerDocument
$documentnode = $node->ownerDocument;
Through this function it is always possible to access the document the current
node is bound to.
=item getOwner
$node = $node->getOwner;
This function returns the node the current node is associated with. In most
cases this will be a document node or a document fragment node.
=item setOwnerDocument
$node->setOwnerDocument( $doc );
This function binds a node to another DOM. This method unbinds the node first,
if it is already bound to another document.
This function is the opposite calling of L<<<<<< XML::LibXML::Document >>>>>>'s adoptNode() function. Because of this it has the same limitations with
Entity References as adoptNode().
=item insertBefore
$node->insertBefore( $newNode, $refNode );
The method inserts C<<<<<< $newNode >>>>>> before C<<<<<< $refNode >>>>>>. If C<<<<<< $refNode >>>>>> is undefined, the newNode will be set as the new last child of the parent node.
This function differs from the DOM L2 specification, in the case, if the new
node is not part of the document, the node will be imported first,
automatically.
$refNode has to be passed to the function even if it is undefined:
$node->insertBefore( $newNode, undef ); # the same as $node->appendChild( $newNode );
$node->insertBefore( $newNode ); # wrong
Note, that the reference node has to be a direct child of the node the function
is called on. Also, $newChild is not allowed to be an ancestor of the new
parent node.
=item insertAfter
$node->insertAfter( $newNode, $refNode );
The method inserts C<<<<<< $newNode >>>>>> after C<<<<<< $refNode >>>>>>. If C<<<<<< $refNode >>>>>> is undefined, the newNode will be set as the new last child of the parent node.
Note, that $refNode has to be passed explicitly even if it is undef.
=item findnodes
@nodes = $node->findnodes( $xpath_expression );
I<<<<<< findnodes >>>>>> evaluates the xpath expression (XPath 1.0) on the current node and returns the
resulting node set as an array. In scalar context, returns an L<<<<<< XML::LibXML::NodeList >>>>>> object.
The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
I<<<<<< NOTE ON NAMESPACES AND XPATH >>>>>>:
A common mistake about XPath is to assume that node tests consisting of an
element name with no prefix match elements in the default namespace. This
assumption is wrong - by XPath specification, such node tests can only match
elements that are in no (i.e. null) namespace.
So, for example, one cannot match the root element of an XHTML document with C<<<<<< $node-E<gt>find('/html') >>>>>> since C<<<<<< '/html' >>>>>> would only match if the root element C<<<<<< E<lt>htmlE<gt> >>>>>> had no namespace, but all XHTML elements belong to the namespace
http://www.w3.org/1999/xhtml. (Note that C<<<<<< xmlns="..." >>>>>> namespace declarations can also be specified in a DTD, which makes the
situation even worse, since the XML document looks as if there was no default
namespace).
There are several possible ways to deal with namespaces in XPath:
=over 4
=item *
The recommended way is to use the L<<<<<< XML::LibXML::XPathContext >>>>>> module to define an explicit context for XPath evaluation, in which a document
independent prefix-to-namespace mapping can be defined. For example:
my $xpc = XML::LibXML::XPathContext->new;
$xpc->registerNs('x', 'http://www.w3.org/1999/xhtml');
$xpc->find('/x:html',$node);
=item *
Another possibility is to use prefixes declared in the queried document (if
known). If the document declares a prefix for the namespace in question (and
the context node is in the scope of the declaration), C<<<<<< XML::LibXML >>>>>> allows you to use the prefix in the XPath expression, e.g.:
$node->find('/x:html');
=back
See also XML::LibXML::XPathContext->findnodes.
=item find
$result = $node->find( $xpath );
I<<<<<< find >>>>>> evaluates the XPath 1.0 expression using the current node as the context of the
expression, and returns the result depending on what type of result the XPath
expression had. For example, the XPath "1 * 3 + 52" results in a L<<<<<< XML::LibXML::Number >>>>>> object being returned. Other expressions might return an L<<<<<< XML::LibXML::Boolean >>>>>> object, or an L<<<<<< XML::LibXML::Literal >>>>>> object (a string). Each of those objects uses Perl's overload feature to "do
the right thing" in different contexts.
The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
See also L<<<<<< XML::LibXML::XPathContext >>>>>>->find.
=item findvalue
print $node->findvalue( $xpath );
I<<<<<< findvalue >>>>>> is exactly equivalent to:
$node->find( $xpath )->to_literal;
That is, it returns the literal value of the results. This enables you to
ensure that you get a string back from your search, allowing certain shortcuts.
This could be used as the equivalent of XSLT's <xsl:value-of
select="some_xpath"/>.
See also L<<<<<< XML::LibXML::XPathContext >>>>>>->findvalue.
The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
=item exists
$bool = $node->exists( $xpath_expression );
This method behaves like I<<<<<< findnodes >>>>>>, except that it only returns a boolean value (1 if the expression matches a
node, 0 otherwise) and may be faster than I<<<<<< findnodes >>>>>>, because the XPath evaluation may stop early on the first match (this is true
for libxml2 >= 2.6.27).
For XPath expressions that do not return node-set, the method returns true if
the returned value is a non-zero number or a non-empty string.
=item childNodes
@childnodes = $node->childNodes();
I<<<<<< childNodes >>>>>> implements a more intuitive interface to the childnodes of the current node. It
enables you to pass all children directly to a C<<<<<< map >>>>>> or C<<<<<< grep >>>>>>. If this function is called in scalar context, a L<<<<<< XML::LibXML::NodeList >>>>>> object will be returned.
=item nonBlankChildNodes
@childnodes = $node->nonBlankChildNodes();
This is like I<<<<<< childNodes >>>>>>, but returns only non-blank nodes (where a node is blank if it is a Text or
CDATA node consisting of whitespace only). This method is not defined by DOM.
=item toString
$xmlstring = $node->toString($format,$docencoding);
This method is similar to the method C<<<<<< toString >>>>>> of a L<<<<<< XML::LibXML::Document >>>>>> but for a single node. It returns a string consisting of XML serialization of
the given node and all its descendants. Unlike C<<<<<< XML::LibXML::Document::toString >>>>>>, in this case the resulting string is by default a character string (UTF-8
encoded with UTF8 flag on). An optional flag $format controls indentation, as
in C<<<<<< XML::LibXML::Document::toString >>>>>>. If the second optional $docencoding flag is true, the result will be a byte
string in the document encoding (see C<<<<<< XML::LibXML::Document::actualEncoding >>>>>>).
=item toStringC14N
$c14nstring = $node->toStringC14N();
$c14nstring = $node->toStringC14N($with_comments, $xpath_expression , $xpath_context);
The function is similar to toString(). Instead of simply serializing the
document tree, it transforms it as it is specified in the XML-C14N
Specification (see L<<<<<< http://www.w3.org/TR/xml-c14n >>>>>>). Such transformation is known as canonization.
If $with_comments is 0 or not defined, the result-document will not contain any
comments that exist in the original document. To include comments into the
canonized document, $with_comments has to be set to 1.
The parameter $xpath_expression defines the nodeset of nodes that should be
visible in the resulting document. This can be used to filter out some nodes.
One has to note, that only the nodes that are part of the nodeset, will be
included into the result-document. Their child-nodes will not exist in the
resulting document, unless they are part of the nodeset defined by the xpath
expression.
If $xpath_expression is omitted or empty, toStringC14N() will include all nodes
in the given sub-tree, using the following XPath expressions: with comments
(. | .//node() | .//@* | .//namespace::*)
and without comments
(. | .//node() | .//@* | .//namespace::*)[not(self::comment())]
An optional parameter $xpath_context can be used to pass an L<<<<<< XML::LibXML::XPathContext >>>>>> object defining the context for evaluation of $xpath_expression. This is useful
for mapping namespace prefixes used in the XPath expression to namespace URIs.
Note, however, that $node will be used as the context node for the evaluation,
not the context node of $xpath_context!
=item toStringC14N_v1_1
$c14nstring = $node->toStringC14N_v1_1();
$c14nstring = $node->toStringC14N_v1_1($with_comments, $xpath_expression , $xpath_context);
This function behaves like toStringC14N() except that it uses the
"XML_C14N_1_1" constant for canonicalising using the "C14N 1.1 spec".
=item toStringEC14N
$ec14nstring = $node->toStringEC14N();
$ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $inclusive_prefix_list);
$ec14nstring = $node->toStringEC14N($with_comments, $xpath_expression, $xpath_context, $inclusive_prefix_list);
The function is similar to toStringC14N() but follows the XML-EXC-C14N
Specification (see L<<<<<< http://www.w3.org/TR/xml-exc-c14n >>>>>>) for exclusive canonization of XML.
The arguments $with_comments, $xpath_expression, $xpath_context are as in
toStringC14N(). An ARRAY reference can be passed as the last argument
$inclusive_prefix_list, listing namespace prefixes that are to be handled in
the manner described by the Canonical XML Recommendation (i.e. preserved in the
output even if the namespace is not used). C.f. the spec for details.
=item serialize
$str = $doc->serialize($format);
An alias for toString(). This function was name added to be more consistent
with libxml2.
=item serialize_c14n
An alias for toStringC14N().
=item serialize_exc_c14n
An alias for toStringEC14N().
=item localname
$localname = $node->localname;
Returns the local name of a tag. This is the part behind the colon.
=item prefix
$nameprefix = $node->prefix;
Returns the prefix of a tag. This is the part before the colon.
=item namespaceURI
$uri = $node->namespaceURI();
returns the URI of the current namespace.
=item hasAttributes
$boolean = $node->hasAttributes();
returns 1 (TRUE) if the current node has any attributes set, otherwise 0
(FALSE) is returned.
=item attributes
@attributelist = $node->attributes();
This function returns all attributes and namespace declarations assigned to the
given node.
Because XML::LibXML does not implement namespace declarations and attributes
the same way, it is required to test what kind of node is handled while
accessing the functions result.
If this function is called in array context the attribute nodes are returned as
an array. In scalar context, the function will return a L<<<<<< XML::LibXML::NamedNodeMap >>>>>> object.
=item lookupNamespaceURI
$URI = $node->lookupNamespaceURI( $prefix );
Find a namespace URI by its prefix starting at the current node.
=item lookupNamespacePrefix
$prefix = $node->lookupNamespacePrefix( $URI );
Find a namespace prefix by its URI starting at the current node.
I<<<<<< NOTE >>>>>> Only the namespace URIs are meant to be unique. The prefix is only document
related. Also the document might have more than a single prefix defined for a
namespace.
=item normalize
$node->normalize;
This function normalizes adjacent text nodes. This function is not as strict as
libxml2's xmlTextMerge() function, since it will not free a node that is still
referenced by the perl layer.
=item getNamespaces
@nslist = $node->getNamespaces;
If a node has any namespaces defined, this function will return these
namespaces. Note, that this will not return all namespaces that are in scope,
but only the ones declared explicitly for that node.
Although getNamespaces is available for all nodes, it only makes sense if used
with element nodes.
=item removeChildNodes
$node->removeChildNodes();
This function is not specified for any DOM level: It removes all childnodes
from a node in a single step. Other than the libxml2 function itself
(xmlFreeNodeList), this function will not immediately remove the nodes from the
memory. This saves one from getting memory violations, if there are nodes still
referred to from the Perl level.
=item baseURI ()
$strURI = $node->baseURI();
Searches for the base URL of the node. The method should work on both XML and
HTML documents even if base mechanisms for these are completely different. It
returns the base as defined in RFC 2396 sections "5.1.1. Base URI within
Document Content" and "5.1.2. Base URI from the Encapsulating Entity". However
it does not return the document base (5.1.3), use method C<<<<<< URI >>>>>> of C<<<<<< XML::LibXML::Document >>>>>> for this.
=item setBaseURI ($strURI)
$node->setBaseURI($strURI);
This method only does something useful for an element node in an XML document.
It sets the xml:base attribute on the node to $strURI, which effectively sets
the base URI of the node to the same value.
Note: For HTML documents this behaves as if the document was XML which may not
be desired, since it does not effectively set the base URI of the node. See RFC
2396 appendix D for an example of how base URI can be specified in HTML.
=item nodePath
$node->nodePath();
This function is not specified for any DOM level: It returns a canonical
structure based XPath for a given node.
=item line_number
$lineno = $node->line_number();
This function returns the line number where the tag was found during parsing.
If a node is added to the document the line number is 0. Problems may occur, if
a node from one document is passed to another one.
IMPORTANT: Due to limitations in the libxml2 library line numbers greater than
65535 will be returned as 65535. Please see L<<<<<< http://bugzilla.gnome.org/show_bug.cgi?id=325533 >>>>>> for more details.
Note: line_number() is special to XML::LibXML and not part of the DOM
specification.
If the line_numbers flag of the parser was not activated before parsing,
line_number() will always return 0.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,345 @@
# $Id$
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::NodeList;
use strict;
use warnings;
use XML::LibXML::Boolean;
use XML::LibXML::Literal;
use XML::LibXML::Number;
use vars qw($VERSION);
$VERSION = "2.0206"; # VERSION TEMPLATE: DO NOT CHANGE
use overload
'""' => \&to_literal,
'bool' => \&to_boolean,
'cmp' => sub {
my($aa, $bb, $order) = @_;
return ($order ? ("$bb" cmp "$aa") : ("$aa" cmp "$bb"));
},
;
sub new {
my $class = shift;
bless [@_], $class;
}
sub new_from_ref {
my ($class,$array_ref,$reuse) = @_;
return bless $reuse ? $array_ref : [@$array_ref], $class;
}
sub pop {
my $self = CORE::shift;
CORE::pop @$self;
}
sub push {
my $self = CORE::shift;
CORE::push @$self, @_;
}
sub append {
my $self = CORE::shift;
my ($nodelist) = @_;
CORE::push @$self, $nodelist->get_nodelist;
}
sub shift {
my $self = CORE::shift;
CORE::shift @$self;
}
sub unshift {
my $self = CORE::shift;
CORE::unshift @$self, @_;
}
sub prepend {
my $self = CORE::shift;
my ($nodelist) = @_;
CORE::unshift @$self, $nodelist->get_nodelist;
}
sub size {
my $self = CORE::shift;
scalar @$self;
}
sub get_node {
# uses array index starting at 1, not 0
# this is mainly because of XPath.
my $self = CORE::shift;
my ($pos) = @_;
$self->[$pos - 1];
}
sub item
{
my ($self, $pos) = @_;
return $self->[$pos];
}
sub get_nodelist {
my $self = CORE::shift;
@$self;
}
sub to_boolean {
my $self = CORE::shift;
return (@$self > 0) ? XML::LibXML::Boolean->True : XML::LibXML::Boolean->False;
}
# string-value of a nodelist is the string-value of the first node
sub string_value {
my $self = CORE::shift;
return '' unless @$self;
return $self->[0]->string_value;
}
sub to_literal {
my $self = CORE::shift;
return XML::LibXML::Literal->new(
join('', CORE::grep {defined $_} CORE::map { $_->string_value } @$self)
);
}
sub to_literal_delimited {
my $self = CORE::shift;
return XML::LibXML::Literal->new(
join(CORE::shift, CORE::grep {defined $_} CORE::map { $_->string_value } @$self)
);
}
sub to_literal_list {
my $self = CORE::shift;
my @nodes = CORE::map{ XML::LibXML::Literal->new($_->string_value())->value() } @{$self};
if (wantarray) {
return( @nodes );
}
return( \@nodes );
}
sub to_number {
my $self = CORE::shift;
return XML::LibXML::Number->new(
$self->to_literal
);
}
sub iterator {
warn "this function is obsolete!\nIt was disabled in version 1.54\n";
return undef;
}
sub map {
my $self = CORE::shift;
my $sub = __is_code(CORE::shift);
local $_;
my @results = CORE::map { @{[ $sub->($_) ]} } @$self;
return unless defined wantarray;
return wantarray ? @results : (ref $self)->new(@results);
}
sub grep {
my $self = CORE::shift;
my $sub = __is_code(CORE::shift);
local $_;
my @results = CORE::grep { $sub->($_) } @$self;
return unless defined wantarray;
return wantarray ? @results : (ref $self)->new(@results);
}
sub sort {
my $self = CORE::shift;
my $sub = __is_code(CORE::shift);
my @results = CORE::sort { $sub->($a,$b) } @$self;
return wantarray ? @results : (ref $self)->new(@results);
}
sub foreach {
my $self = CORE::shift;
my $sub = CORE::shift;
foreach my $item (@$self)
{
local $_ = $item;
$sub->($item);
}
return wantarray ? @$self : $self;
}
sub reverse {
my $self = CORE::shift;
my @results = CORE::reverse @$self;
return wantarray ? @results : (ref $self)->new(@results);
}
sub reduce {
my $self = CORE::shift;
my $sub = __is_code(CORE::shift);
my @list = @$self;
CORE::unshift @list, $_[0] if @_;
my $a = CORE::shift(@list);
foreach my $b (@list)
{
$a = $sub->($a, $b);
}
return $a;
}
sub __is_code {
my ($code) = @_;
if (ref $code eq 'CODE') {
return $code;
}
# There are better ways of doing this, but here I've tried to
# avoid adding any additional external dependencies.
#
if (UNIVERSAL::can($code, 'can') # is blessed (sort of)
and overload::Overloaded($code) # is overloaded
and overload::Method($code, '&{}')) { # overloads '&{}'
return $code;
}
# The other possibility is that $code is a coderef, but is
# blessed into a class that doesn't overload '&{}'. In which
# case... well, I'm stumped!
die "Not a subroutine reference\n";
}
1;
__END__
=head1 NAME
XML::LibXML::NodeList - a list of XML document nodes
=head1 DESCRIPTION
An XML::LibXML::NodeList object contains an ordered list of nodes, as
detailed by the W3C DOM documentation of Node Lists.
=head1 SYNOPSIS
my $results = $dom->findnodes('//somepath');
foreach my $context ($results->get_nodelist) {
my $newresults = $context->findnodes('./other/element');
...
}
=head1 API
=head2 new(@nodes)
You will almost never have to create a new NodeList object, as it is all
done for you by XPath.
=head2 get_nodelist()
Returns a list of nodes, the contents of the node list, as a perl list.
=head2 string_value()
Returns the string-value of the first node in the list.
See the XPath specification for what "string-value" means.
=head2 to_literal()
Returns the concatenation of all the string-values of all
the nodes in the list.
=head2 to_literal_delimited($separator)
Returns the concatenation of all the string-values of all
the nodes in the list, delimited by the specified separator.
=head2 to_literal_list()
Returns all the string-values of all the nodes in the list as
a perl list.
=head2 get_node($pos)
Returns the node at $pos. The node position in XPath is based at 1, not 0.
=head2 size()
Returns the number of nodes in the NodeList.
=head2 pop()
Equivalent to perl's pop function.
=head2 push(@nodes)
Equivalent to perl's push function.
=head2 append($nodelist)
Given a nodelist, appends the list of nodes in $nodelist to the end of the
current list.
=head2 shift()
Equivalent to perl's shift function.
=head2 unshift(@nodes)
Equivalent to perl's unshift function.
=head2 prepend($nodelist)
Given a nodelist, prepends the list of nodes in $nodelist to the front of
the current list.
=head2 map($coderef)
Equivalent to perl's map function.
=head2 grep($coderef)
Equivalent to perl's grep function.
=head2 sort($coderef)
Equivalent to perl's sort function.
Caveat: Perl's magic C<$a> and C<$b> variables are not available in
C<$coderef>. Instead the two terms are passed to the coderef as arguments.
=head2 reverse()
Equivalent to perl's reverse function.
=head2 foreach($coderef)
Inspired by perl's foreach loop. Executes the coderef on each item in
the list. Similar to C<map>, but instead of returning the list of values
returned by $coderef, returns the original NodeList.
=head2 reduce($coderef, $init)
Equivalent to List::Util's reduce function. C<$init> is optional and
provides an initial value for the reduction.
Caveat: Perl's magic C<$a> and C<$b> variables are not available in
C<$coderef>. Instead the two terms are passed to the coderef as arguments.
=cut

View File

@@ -0,0 +1,98 @@
# $Id$
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::Number;
use XML::LibXML::Boolean;
use XML::LibXML::Literal;
use strict;
use warnings;
use vars qw ($VERSION);
$VERSION = "2.0206"; # VERSION TEMPLATE: DO NOT CHANGE
use overload
'""' => \&value,
'0+' => \&value,
'<=>' => \&cmp;
sub new {
my $class = shift;
my $number = shift;
if ($number !~ /^\s*(-\s*)?(\d+(\.\d*)?|\.\d+)\s*$/) {
$number = undef;
}
else {
$number =~ s/\s+//g;
}
bless \$number, $class;
}
sub as_string {
my $self = shift;
defined $$self ? $$self : 'NaN';
}
sub as_xml {
my $self = shift;
return "<Number>" . (defined($$self) ? $$self : 'NaN') . "</Number>\n";
}
sub value {
my $self = shift;
$$self;
}
sub cmp {
my $self = shift;
my ($other, $swap) = @_;
if ($swap) {
return $other <=> $$self;
}
return $$self <=> $other;
}
sub evaluate {
my $self = shift;
$self;
}
sub to_boolean {
my $self = shift;
return $$self ? XML::LibXML::Boolean->True : XML::LibXML::Boolean->False;
}
sub to_literal { XML::LibXML::Literal->new($_[0]->as_string); }
sub to_number { $_[0]; }
sub string_value { return $_[0]->value }
1;
__END__
=head1 NAME
XML::LibXML::Number - Simple numeric values.
=head1 DESCRIPTION
This class holds simple numeric values. It doesn't support -0, +/- Infinity,
or NaN, as the XPath spec says it should, but I'm not hurting anyone I don't think.
=head1 API
=head2 new($num)
Creates a new XML::LibXML::Number object, with the value in $num. Does some
rudimentary numeric checking on $num to ensure it actually is a number.
=head2 value()
Also as overloaded stringification. Returns the numeric value held.
=cut

View File

@@ -0,0 +1,94 @@
=head1 NAME
XML::LibXML::PI - XML::LibXML Processing Instructions
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Processing Instruction nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
$pinode->setData( $data_string );
$pinode->setData( name=>string_value [...] );
=head1 DESCRIPTION
Processing instructions are implemented with XML::LibXML with read and write
access. The PI data is the PI without the PI target (as specified in XML 1.0
[17]) as a string. This string can be accessed with getData as implemented in L<<<<<< XML::LibXML::Node >>>>>>.
The write access is aware about the fact, that many processing instructions
have attribute like data. Therefore setData() provides besides the DOM spec
conform Interface to pass a set of named parameter. So the code segment
my $pi = $dom->createProcessingInstruction("abc");
$pi->setData(foo=>'bar', foobar=>'foobar');
$dom->appendChild( $pi );
will result the following PI in the DOM:
<?abc foo="bar" foobar="foobar"?>
Which is how it is specified in the DOM specification. This three step
interface creates temporary a node in perl space. This can be avoided while
using the insertProcessingInstruction() method. Instead of the three calls
described above, the call
$dom->insertProcessingInstruction("abc",'foo="bar" foobar="foobar"');
will have the same result as above.
L<<<<<< XML::LibXML::PI >>>>>>'s implementation of setData() documented below differs a bit from the standard
version as available in L<<<<<< XML::LibXML::Node >>>>>>:
=over 4
=item setData
$pinode->setData( $data_string );
$pinode->setData( name=>string_value [...] );
This method allows one to change the content data of a PI. Additionally to the
interface specified for DOM Level2, the method provides a named parameter
interface to set the data. This parameter list is converted into a string
before it is appended to the PI.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,114 @@
=head1 NAME
XML::LibXML::Pattern - XML::LibXML::Pattern - interface to libxml2 XPath patterns
=head1 SYNOPSIS
use XML::LibXML;
my $pattern = XML::LibXML::Pattern->new('/x:html/x:body//x:div', { 'x' => 'http://www.w3.org/1999/xhtml' });
# test a match on an XML::LibXML::Node $node
if ($pattern->matchesNode($node)) { ... }
# or on an XML::LibXML::Reader
if ($reader->matchesPattern($pattern)) { ... }
# or skip reading all nodes that do not match
print $reader->nodePath while $reader->nextPatternMatch($pattern);
$pattern = XML::LibXML::Pattern->new( pattern, { prefix => namespace_URI, ... } );
$bool = $pattern->matchesNode($node);
=head1 DESCRIPTION
This is a perl interface to libxml2's pattern matching support I<<<<<< http://xmlsoft.org/html/libxml-pattern.html >>>>>>. This feature requires recent versions of libxml2.
Patterns are a small subset of XPath language, which is limited to
(disjunctions of) location paths involving the child and descendant axes in
abbreviated form as described by the extended BNF given below:
Selector ::= Path ( '|' Path )*
Path ::= ('.//' | '//' | '/' )? Step ( '/' Step )*
Step ::= '.' | NameTest
NameTest ::= QName | '*' | NCName ':' '*'
For readability, whitespace may be used in selector XPath expressions even
though not explicitly allowed by the grammar: whitespace may be freely added
within patterns before or after any token, where
token ::= '.' | '/' | '//' | '|' | NameTest
Note that no predicates or attribute tests are allowed.
Patterns are particularly useful for stream parsing provided via the C<<<<<< XML::LibXML::Reader >>>>>> interface.
=over 4
=item new()
$pattern = XML::LibXML::Pattern->new( pattern, { prefix => namespace_URI, ... } );
The constructor of a pattern takes a pattern expression (as described by the
BNF grammar above) and an optional HASH reference mapping prefixes to namespace
URIs. The method returns a compiled pattern object.
Note that if the document has a default namespace, it must still be given an
prefix in order to be matched (as demanded by the XPath 1.0 specification). For
example, to match an element C<<<<<< E<lt>a xmlns="http://foo.bar"E<lt>/aE<gt> >>>>>>, one should use a pattern like this:
$pattern = XML::LibXML::Pattern->new( 'foo:a', { foo => 'http://foo.bar' });
=item matchesNode($node)
$bool = $pattern->matchesNode($node);
Given an XML::LibXML::Node object, returns a true value if the node is matched
by the compiled pattern expression.
=back
=head1 SEE ALSO
L<<<<<< XML::LibXML::Reader >>>>>> for other methods involving compiled patterns.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,216 @@
# $Id: Reader.pm,v 1.1.2.1 2004/04/20 20:09:48 pajas Exp $
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::Reader;
use XML::LibXML;
use Carp;
use strict;
use warnings;
use vars qw ($VERSION);
$VERSION = "2.0206"; # VERSION TEMPLATE: DO NOT CHANGE
use 5.008_000;
BEGIN {
UNIVERSAL::can('XML::LibXML::Reader','_newForFile') or
croak("Cannot use XML::LibXML::Reader module - ".
"your libxml2 is compiled without reader support!");
}
use base qw(Exporter);
use constant {
XML_READER_TYPE_NONE => 0,
XML_READER_TYPE_ELEMENT => 1,
XML_READER_TYPE_ATTRIBUTE => 2,
XML_READER_TYPE_TEXT => 3,
XML_READER_TYPE_CDATA => 4,
XML_READER_TYPE_ENTITY_REFERENCE => 5,
XML_READER_TYPE_ENTITY => 6,
XML_READER_TYPE_PROCESSING_INSTRUCTION => 7,
XML_READER_TYPE_COMMENT => 8,
XML_READER_TYPE_DOCUMENT => 9,
XML_READER_TYPE_DOCUMENT_TYPE => 10,
XML_READER_TYPE_DOCUMENT_FRAGMENT => 11,
XML_READER_TYPE_NOTATION => 12,
XML_READER_TYPE_WHITESPACE => 13,
XML_READER_TYPE_SIGNIFICANT_WHITESPACE => 14,
XML_READER_TYPE_END_ELEMENT => 15,
XML_READER_TYPE_END_ENTITY => 16,
XML_READER_TYPE_XML_DECLARATION => 17,
XML_READER_NONE => -1,
XML_READER_START => 0,
XML_READER_ELEMENT => 1,
XML_READER_END => 2,
XML_READER_EMPTY => 3,
XML_READER_BACKTRACK => 4,
XML_READER_DONE => 5,
XML_READER_ERROR => 6
};
use vars qw( @EXPORT @EXPORT_OK %EXPORT_TAGS );
sub CLONE_SKIP { 1 }
BEGIN {
%EXPORT_TAGS = (
types =>
[qw(
XML_READER_TYPE_NONE
XML_READER_TYPE_ELEMENT
XML_READER_TYPE_ATTRIBUTE
XML_READER_TYPE_TEXT
XML_READER_TYPE_CDATA
XML_READER_TYPE_ENTITY_REFERENCE
XML_READER_TYPE_ENTITY
XML_READER_TYPE_PROCESSING_INSTRUCTION
XML_READER_TYPE_COMMENT
XML_READER_TYPE_DOCUMENT
XML_READER_TYPE_DOCUMENT_TYPE
XML_READER_TYPE_DOCUMENT_FRAGMENT
XML_READER_TYPE_NOTATION
XML_READER_TYPE_WHITESPACE
XML_READER_TYPE_SIGNIFICANT_WHITESPACE
XML_READER_TYPE_END_ELEMENT
XML_READER_TYPE_END_ENTITY
XML_READER_TYPE_XML_DECLARATION
)],
states =>
[qw(
XML_READER_NONE
XML_READER_START
XML_READER_ELEMENT
XML_READER_END
XML_READER_EMPTY
XML_READER_BACKTRACK
XML_READER_DONE
XML_READER_ERROR
)]
);
@EXPORT = (@{$EXPORT_TAGS{types}},@{$EXPORT_TAGS{states}});
@EXPORT_OK = @EXPORT;
$EXPORT_TAGS{all}=\@EXPORT_OK;
}
our %_preserve_flag;
{
my %props = (
load_ext_dtd => 1, # load the external subset
complete_attributes => 2, # default DTD attributes
validation => 3, # validate with the DTD
expand_entities => 4, # substitute entities
);
sub getParserProp {
my ($self, $name) = @_;
my $prop = $props{$name};
return undef unless defined $prop;
return $self->_getParserProp($prop);
}
sub setParserProp {
my $self = shift;
my %args = map { ref($_) eq 'HASH' ? (%$_) : $_ } @_;
my ($key, $value);
while (($key,$value) = each %args) {
my $prop = $props{ $key };
$self->_setParserProp($prop,$value);
}
return;
}
my (%string_pool,%rng_pool,%xsd_pool); # used to preserve data passed to the reader
sub new {
my ($class) = shift;
my %args = map { ref($_) eq 'HASH' ? (%$_) : $_ } @_;
my $encoding = $args{encoding};
my $URI = $args{URI};
$URI="$URI" if defined $URI; # stringify in case it is an URI object
my $options = XML::LibXML->_parser_options(\%args);
my $self = undef;
if ( defined $args{location} ) {
$self = $class->_newForFile( $args{location}, $encoding, $options );
}
elsif ( defined $args{string} ) {
$self = $class->_newForString( $args{string}, $URI, $encoding, $options );
if (defined($self)) {
$string_pool{$self} = \$args{string};
}
}
elsif ( defined $args{IO} ) {
$self = $class->_newForIO( $args{IO}, $URI, $encoding, $options );
}
elsif ( defined $args{DOM} ) {
croak("DOM must be a XML::LibXML::Document node")
unless UNIVERSAL::isa($args{DOM}, 'XML::LibXML::Document');
$self = $class->_newForDOM( $args{DOM} );
}
elsif ( defined $args{FD} ) {
my $fd = fileno($args{FD});
$self = $class->_newForFd( $fd, $URI, $encoding, $options );
}
else {
croak("XML::LibXML::Reader->new: specify location, string, IO, DOM, or FD");
}
if ($args{RelaxNG}) {
if (ref($args{RelaxNG})) {
$rng_pool{$self} = \$args{RelaxNG};
$self->_setRelaxNG($args{RelaxNG});
} else {
$self->_setRelaxNGFile($args{RelaxNG});
}
}
if ($args{Schema}) {
if (ref($args{Schema})) {
$xsd_pool{$self} = \$args{Schema};
$self->_setXSD($args{Schema});
} else {
$self->_setXSDFile($args{Schema});
}
}
return $self;
}
sub DESTROY {
my $self = shift;
delete $string_pool{$self};
delete $rng_pool{$self};
delete $xsd_pool{$self};
$self->_DESTROY;
}
}
sub close {
my ($reader) = @_;
# _close return -1 on failure, 0 on success
# perl close returns 0 on failure, 1 on success
return $reader->_close == 0 ? 1 : 0;
}
sub preservePattern {
my $reader=shift;
my ($pattern,$ns_map)=@_;
if (ref($ns_map) eq 'HASH') {
# translate prefix=>URL hash to a (URL,prefix) list
$reader->_preservePattern($pattern,[reverse %$ns_map]);
} else {
$reader->_preservePattern(@_);
}
}
sub nodePath {
my $reader=shift;
my $path = $reader->_nodePath;
$path=~s/\[\d+\]//g; # make /foo[1]/bar[1] just /foo/bar, since
# sibling count in the buffered fragment is
# basically random and generally misleading
return $path;
}
1;
__END__

View File

@@ -0,0 +1,677 @@
=head1 NAME
XML::LibXML::Reader - XML::LibXML::Reader - interface to libxml2 pull parser
=head1 SYNOPSIS
use XML::LibXML::Reader;
my $reader = XML::LibXML::Reader->new(location => "file.xml")
or die "cannot read file.xml\n";
while ($reader->read) {
processNode($reader);
}
sub processNode {
my $reader = shift;
printf "%d %d %s %d\n", ($reader->depth,
$reader->nodeType,
$reader->name,
$reader->isEmptyElement);
}
or
my $reader = XML::LibXML::Reader->new(location => "file.xml")
or die "cannot read file.xml\n";
$reader->preservePattern('//table/tr');
$reader->finish;
print $reader->document->toString(1);
=head1 DESCRIPTION
This is a perl interface to libxml2's pull-parser implementation xmlTextReader I<<<<<< http://xmlsoft.org/html/libxml-xmlreader.html >>>>>>. This feature requires at least libxml2-2.6.21. Pull-parsers (such as StAX in
Java, or XmlReader in C#) use an iterator approach to parse XML documents. They
are easier to program than event-based parser (SAX) and much more lightweight
than tree-based parser (DOM), which load the complete tree into memory.
The Reader acts as a cursor going forward on the document stream and stopping
at each node on the way. At every point, the DOM-like methods of the Reader
object allow one to examine the current node (name, namespace, attributes,
etc.)
The user's code keeps control of the progress and simply calls the C<<<<<< read() >>>>>> function repeatedly to progress to the next node in the document order. Other
functions provide means for skipping complete sub-trees, or nodes until a
specific element, etc.
At every time, only a very limited portion of the document is kept in the
memory, which makes the API more memory-efficient than using DOM. However, it
is also possible to mix Reader with DOM. At every point the user may copy the
current node (optionally expanded into a complete sub-tree) from the processed
document to another DOM tree, or to instruct the Reader to collect sub-document
in form of a DOM tree consisting of selected nodes.
Reader API also supports namespaces, xml:base, entity handling, and DTD
validation. Schema and RelaxNG validation support will probably be added in
some later revision of the Perl interface.
The naming of methods compared to libxml2 and C# XmlTextReader has been changed
slightly to match the conventions of XML::LibXML. Some functions have been
changed or added with respect to the C interface.
=head1 CONSTRUCTOR
Depending on the XML source, the Reader object can be created with either of:
my $reader = XML::LibXML::Reader->new( location => "file.xml", ... );
my $reader = XML::LibXML::Reader->new( string => $xml_string, ... );
my $reader = XML::LibXML::Reader->new( IO => $file_handle, ... );
my $reader = XML::LibXML::Reader->new( FD => fileno(STDIN), ... );
my $reader = XML::LibXML::Reader->new( DOM => $dom, ... );
where ... are (optional) reader options described below in L<<<<<< Reader options >>>>>> or various parser options described in L<<<<<< XML::LibXML::Parser >>>>>>. The constructor recognizes the following XML sources:
=head2 Source specification
=over 4
=item location
Read XML from a local file or (non-HTTPS) URL.
=item string
Read XML from a string.
=item IO
Read XML a Perl IO filehandle.
=item FD
Read XML from a file descriptor (bypasses Perl I/O layer, only applicable to
filehandles for regular files or pipes). Possibly faster than IO.
=item DOM
Use reader API to walk through a pre-parsed L<<<<<< XML::LibXML::Document >>>>>>.
=back
=head2 Reader options
=over 4
=item encoding => $encoding
override document encoding.
=item RelaxNG => $rng_schema
can be used to pass either a L<<<<<< XML::LibXML::RelaxNG >>>>>> object or a filename or (non-HTTPS) URL of a RelaxNG schema to the constructor.
The schema is then used to validate the document as it is processed.
=item Schema => $xsd_schema
can be used to pass either a L<<<<<< XML::LibXML::Schema >>>>>> object or a filename or (non-HTTPS) URL of a W3C XSD schema to the constructor.
The schema is then used to validate the document as it is processed.
=item ...
the reader further supports various parser options described in L<<<<<< XML::LibXML::Parser >>>>>> (specifically those labeled by /reader/).
=back
=head1 METHODS CONTROLLING PARSING PROGRESS
=over 4
=item read ()
Moves the position to the next node in the stream, exposing its properties.
Returns 1 if the node was read successfully, 0 if there is no more nodes to
read, or -1 in case of error
=item readAttributeValue ()
Parses an attribute value into one or more Text and EntityReference nodes.
Returns 1 in case of success, 0 if the reader was not positioned on an
attribute node or all the attribute values have been read, or -1 in case of
error.
=item readState ()
Gets the read state of the reader. Returns the state value, or -1 in case of
error. The module exports constants for the Reader states, see STATES below.
=item depth ()
The depth of the node in the tree, starts at 0 for the root node.
=item next ()
Skip to the node following the current one in the document order while avoiding
the sub-tree if any. Returns 1 if the node was read successfully, 0 if there is
no more nodes to read, or -1 in case of error.
=item nextElement (localname?,nsURI?)
Skip nodes following the current one in the document order until a specific
element is reached. The element's name must be equal to a given localname if
defined, and its namespace must equal to a given nsURI if defined. Either of
the arguments can be undefined (or omitted, in case of the latter or both).
Returns 1 if the element was found, 0 if there is no more nodes to read, or -1
in case of error.
=item nextPatternMatch (compiled_pattern)
Skip nodes following the current one in the document order until an element
matching a given compiled pattern is reached. See L<<<<<< XML::LibXML::Pattern >>>>>> for information on compiled patterns. See also the C<<<<<< matchesPattern >>>>>> method.
Returns 1 if the element was found, 0 if there is no more nodes to read, or -1
in case of error.
=item skipSiblings ()
Skip all nodes on the same or lower level until the first node on a higher
level is reached. In particular, if the current node occurs in an element, the
reader stops at the end tag of the parent element, otherwise it stops at a node
immediately following the parent node.
Returns 1 if successful, 0 if end of the document is reached, or -1 in case of
error.
=item nextSibling ()
It skips to the node following the current one in the document order while
avoiding the sub-tree if any.
Returns 1 if the node was read successfully, 0 if there is no more nodes to
read, or -1 in case of error
=item nextSiblingElement (name?,nsURI?)
Like nextElement but only processes sibling elements of the current node
(moving forward using C<<<<<< nextSibling () >>>>>> rather than C<<<<<< read () >>>>>>, internally).
Returns 1 if the element was found, 0 if there is no more sibling nodes, or -1
in case of error.
=item finish ()
Skip all remaining nodes in the document, reaching end of the document.
Returns 1 if successful, 0 in case of error.
=item close ()
This method releases any resources allocated by the current instance and closes
any underlying input. It returns 0 on failure and 1 on success. This method is
automatically called by the destructor when the reader is forgotten, therefore
you do not have to call it directly.
=back
=head1 METHODS EXTRACTING INFORMATION
=over 4
=item name ()
Returns the qualified name of the current node, equal to (Prefix:)LocalName.
=item nodeType ()
Returns the type of the current node. See NODE TYPES below.
=item localName ()
Returns the local name of the node.
=item prefix ()
Returns the prefix of the namespace associated with the node.
=item namespaceURI ()
Returns the URI defining the namespace associated with the node.
=item isEmptyElement ()
Check if the current node is empty, this is a bit bizarre in the sense that
<a/> will be considered empty while <a></a> will not.
=item hasValue ()
Returns true if the node can have a text value.
=item value ()
Provides the text value of the node if present or undef if not available.
=item readInnerXml ()
Reads the contents of the current node, including child nodes and markup.
Returns a string containing the XML of the node's content, or undef if the
current node is neither an element nor attribute, or has no child nodes.
=item readOuterXml ()
Reads the contents of the current node, including child nodes and markup.
Returns a string containing the XML of the node including its content, or undef
if the current node is neither an element nor attribute.
=item nodePath()
Returns a canonical location path to the current element from the root node to
the current node. Namespaced elements are matched by '*', because there is no
way to declare prefixes within XPath patterns. Unlike C<<<<<< XML::LibXML::Node::nodePath() >>>>>>, this function does not provide sibling counts (i.e. instead of e.g. '/a/b[1]'
and '/a/b[2]' you get '/a/b' for both matches).
=item matchesPattern(compiled_pattern)
Returns a true value if the current node matches a compiled pattern. See L<<<<<< XML::LibXML::Pattern >>>>>> for information on compiled patterns. See also the C<<<<<< nextPatternMatch >>>>>> method.
=back
=head1 METHODS EXTRACTING DOM NODES
=over 4
=item document ()
Provides access to the document tree built by the reader. This function can be
used to collect the preserved nodes (see C<<<<<< preserveNode() >>>>>> and preservePattern).
CAUTION: Never use this function to modify the tree unless reading of the whole
document is completed!
=item copyCurrentNode (deep)
This function is similar a DOM function C<<<<<< copyNode() >>>>>>. It returns a copy of the currently processed node as a corresponding DOM
object. Use deep = 1 to obtain the full sub-tree.
=item preserveNode ()
This tells the XML Reader to preserve the current node in the document tree. A
document tree consisting of the preserved nodes and their content can be
obtained using the method C<<<<<< document() >>>>>> once parsing is finished.
Returns the node or NULL in case of error.
=item preservePattern (pattern,\%ns_map)
This tells the XML Reader to preserve all nodes matched by the pattern (which
is a streaming XPath subset). A document tree consisting of the preserved nodes
and their content can be obtained using the method C<<<<<< document() >>>>>> once parsing is finished.
An optional second argument can be used to provide a HASH reference mapping
prefixes used by the XPath to namespace URIs.
The XPath subset available with this function is described at
http://www.w3.org/TR/xmlschema-1/#Selector
and matches the production
Path ::= ('.//')? ( Step '/' )* ( Step | '@' NameTest )
Returns a positive number in case of success and -1 in case of error
=back
=head1 METHODS PROCESSING ATTRIBUTES
=over 4
=item attributeCount ()
Provides the number of attributes of the current node.
=item hasAttributes ()
Whether the node has attributes.
=item getAttribute (name)
Provides the value of the attribute with the specified qualified name.
Returns a string containing the value of the specified attribute, or undef in
case of error.
=item getAttributeNs (localName, namespaceURI)
Provides the value of the specified attribute.
Returns a string containing the value of the specified attribute, or undef in
case of error.
=item getAttributeNo (no)
Provides the value of the attribute with the specified index relative to the
containing element.
Returns a string containing the value of the specified attribute, or undef in
case of error.
=item isDefault ()
Returns true if the current attribute node was generated from the default value
defined in the DTD.
=item moveToAttribute (name)
Moves the position to the attribute with the specified local name and namespace
URI.
Returns 1 in case of success, -1 in case of error, 0 if not found
=item moveToAttributeNo (no)
Moves the position to the attribute with the specified index relative to the
containing element.
Returns 1 in case of success, -1 in case of error, 0 if not found
=item moveToAttributeNs (localName,namespaceURI)
Moves the position to the attribute with the specified local name and namespace
URI.
Returns 1 in case of success, -1 in case of error, 0 if not found
=item moveToFirstAttribute ()
Moves the position to the first attribute associated with the current node.
Returns 1 in case of success, -1 in case of error, 0 if not found
=item moveToNextAttribute ()
Moves the position to the next attribute associated with the current node.
Returns 1 in case of success, -1 in case of error, 0 if not found
=item moveToElement ()
Moves the position to the node that contains the current attribute node.
Returns 1 in case of success, -1 in case of error, 0 if not moved
=item isNamespaceDecl ()
Determine whether the current node is a namespace declaration rather than a
regular attribute.
Returns 1 if the current node is a namespace declaration, 0 if it is a regular
attribute or other type of node, or -1 in case of error.
=back
=head1 OTHER METHODS
=over 4
=item lookupNamespace (prefix)
Resolves a namespace prefix in the scope of the current element.
Returns a string containing the namespace URI to which the prefix maps or undef
in case of error.
=item encoding ()
Returns a string containing the encoding of the document or undef in case of
error.
=item standalone ()
Determine the standalone status of the document being read. Returns 1 if the
document was declared to be standalone, 0 if it was declared to be not
standalone, or -1 if the document did not specify its standalone status or in
case of error.
=item xmlVersion ()
Determine the XML version of the document being read. Returns a string
containing the XML version of the document or undef in case of error.
=item baseURI ()
Returns the base URI of a given node.
=item isValid ()
Retrieve the validity status from the parser.
Returns 1 if valid, 0 if no, and -1 in case of error.
=item xmlLang ()
The xml:lang scope within which the node resides.
=item lineNumber ()
Provide the line number of the current parsing point.
=item columnNumber ()
Provide the column number of the current parsing point.
=item byteConsumed ()
This function provides the current index of the parser relative to the start of
the current entity. This function is computed in bytes from the beginning
starting at zero and finishing at the size in bytes of the file if parsing a
file. The function is of constant cost if the input is UTF-8 but can be costly
if run on non-UTF-8 input.
=item setParserProp (prop => value, ...)
Change the parser processing behaviour by changing some of its internal
properties. The following properties are available with this function:
``load_ext_dtd'', ``complete_attributes'', ``validation'', ``expand_entities''.
Since some of the properties can only be changed before any read has been done,
it is best to set the parsing properties at the constructor.
Returns 0 if the call was successful, or -1 in case of error
=item getParserProp (prop)
Get value of an parser internal property. The following property names can be
used: ``load_ext_dtd'', ``complete_attributes'', ``validation'',
``expand_entities''.
Returns the value, usually 0 or 1, or -1 in case of error.
=back
=head1 DESTRUCTION
XML::LibXML takes care of the reader object destruction when the last reference
to the reader object goes out of scope. The document tree is preserved, though,
if either of $reader->document or $reader->preserveNode was used and references
to the document tree exist.
=head1 NODE TYPES
The reader interface provides the following constants for node types (the
constant symbols are exported by default or if tag C<<<<<< :types >>>>>> is used).
XML_READER_TYPE_NONE => 0
XML_READER_TYPE_ELEMENT => 1
XML_READER_TYPE_ATTRIBUTE => 2
XML_READER_TYPE_TEXT => 3
XML_READER_TYPE_CDATA => 4
XML_READER_TYPE_ENTITY_REFERENCE => 5
XML_READER_TYPE_ENTITY => 6
XML_READER_TYPE_PROCESSING_INSTRUCTION => 7
XML_READER_TYPE_COMMENT => 8
XML_READER_TYPE_DOCUMENT => 9
XML_READER_TYPE_DOCUMENT_TYPE => 10
XML_READER_TYPE_DOCUMENT_FRAGMENT => 11
XML_READER_TYPE_NOTATION => 12
XML_READER_TYPE_WHITESPACE => 13
XML_READER_TYPE_SIGNIFICANT_WHITESPACE => 14
XML_READER_TYPE_END_ELEMENT => 15
XML_READER_TYPE_END_ENTITY => 16
XML_READER_TYPE_XML_DECLARATION => 17
=head1 STATES
The following constants represent the values returned by C<<<<<< readState() >>>>>>. They are exported by default, or if tag C<<<<<< :states >>>>>> is used:
XML_READER_NONE => -1
XML_READER_START => 0
XML_READER_ELEMENT => 1
XML_READER_END => 2
XML_READER_EMPTY => 3
XML_READER_BACKTRACK => 4
XML_READER_DONE => 5
XML_READER_ERROR => 6
=head1 SEE ALSO
L<<<<<< XML::LibXML::Pattern >>>>>> for information about compiled patterns.
http://xmlsoft.org/html/libxml-xmlreader.html
http://dotgnu.org/pnetlib-doc/System/Xml/XmlTextReader.html
=head1 ORIGINAL IMPLEMENTATION
Heiko Klein, <H.Klein@gmx.net<gt> and Petr Pajas
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,78 @@
=head1 NAME
XML::LibXML::RegExp - XML::LibXML::RegExp - interface to libxml2 regular expressions
=head1 SYNOPSIS
use XML::LibXML;
my $compiled_re = XML::LibXML::RegExp->new('[0-9]{5}(-[0-9]{4})?');
if ($compiled_re->isDeterministic()) { ... }
if ($compiled_re->matches($string)) { ... }
$compiled_re = XML::LibXML::RegExp->new( $regexp_str );
$bool = $compiled_re->matches($string);
$bool = $compiled_re->isDeterministic();
=head1 DESCRIPTION
This is a perl interface to libxml2's implementation of regular expressions,
which are used e.g. for validation of XML Schema simple types (pattern facet).
=over 4
=item new()
$compiled_re = XML::LibXML::RegExp->new( $regexp_str );
The constructor takes a string containing a regular expression and returns a
compiled regexp object.
=item matches($string)
$bool = $compiled_re->matches($string);
Given a string value, returns a true value if the value is matched by the
compiled regular expression.
=item isDeterministic()
$bool = $compiled_re->isDeterministic();
Returns a true value if the regular expression is deterministic; returns false
otherwise. (See the definition of determinism in the XML spec (L<<<<<< http://www.w3.org/TR/REC-xml/#determinism >>>>>>))
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,93 @@
=head1 NAME
XML::LibXML::RelaxNG - RelaxNG Schema Validation
=head1 SYNOPSIS
use XML::LibXML;
$doc = XML::LibXML->new->parse_file($url);
$rngschema = XML::LibXML::RelaxNG->new( location => $filename_or_url, no_network => 1 );
$rngschema = XML::LibXML::RelaxNG->new( string => $xmlschemastring, no_network => 1 );
$rngschema = XML::LibXML::RelaxNG->new( DOM => $doc, no_network => 1 );
eval { $rngschema->validate( $doc ); };
=head1 DESCRIPTION
The XML::LibXML::RelaxNG class is a tiny frontend to libxml2's RelaxNG
implementation. Currently it supports only schema parsing and document
validation.
=head1 METHODS
=over 4
=item new
$rngschema = XML::LibXML::RelaxNG->new( location => $filename_or_url, no_network => 1 );
$rngschema = XML::LibXML::RelaxNG->new( string => $xmlschemastring, no_network => 1 );
$rngschema = XML::LibXML::RelaxNG->new( DOM => $doc, no_network => 1 );
The constructor of XML::LibXML::RelaxNG needs to be called with list of
parameters. At least location, string or DOM parameter is required to specify
source of schema. Optional parameter no_network set to 1 cause that parser
would not access network and optional parameter recover set 1 cause that parser
would not call die() on errors.
It is important, that each schema only have a single source.
The location parameter allows one to parse a schema from the filesystem or a
(non-HTTPS) URL.
The string parameter will parse the schema from the given XML string.
The DOM parameter allows one to parse the schema from a pre-parsed L<<<<<< XML::LibXML::Document >>>>>>.
Note that the constructor will die() if the schema does not meed the
constraints of the RelaxNG specification.
=item validate
eval { $rngschema->validate( $doc ); };
This function allows one to validate a (parsed) document against the given
RelaxNG schema. The argument of this function should be an
XML::LibXML::Document object. If this function succeeds, it will return 0,
otherwise it will die() and report the errors found. Because of this validate()
should be always evaluated.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,122 @@
# $Id$
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::SAX;
use strict;
use warnings;
use vars qw($VERSION @ISA);
$VERSION = "2.0206"; # VERSION TEMPLATE: DO NOT CHANGE
use XML::LibXML;
use XML::SAX::Base;
use parent qw(XML::SAX::Base);
use Carp;
use IO::File;
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
sub set_feature {
my ($self, $feat, $val) = @_;
if ($feat eq 'http://xmlns.perl.org/sax/join-character-data') {
$self->{JOIN_CHARACTERS} = $val;
return 1;
}
shift(@_);
return $self->SUPER::set_feature(@_);
}
sub _parse_characterstream {
my ( $self, $fh ) = @_;
# this my catch the xml decl, so the parser won't get confused about
# a possibly wrong encoding.
croak( "not implemented yet" );
}
# See:
# https://rt.cpan.org/Public/Bug/Display.html?id=132759
sub _calc_new_XML_LibXML_parser_for_compatibility_with_XML_Simple_etc
{
return XML::LibXML->new( expand_entities => 1, );
}
sub _parse_bytestream {
my ( $self, $fh ) = @_;
$self->{ParserOptions}{LibParser} = $self->_calc_new_XML_LibXML_parser_for_compatibility_with_XML_Simple_etc() unless defined $self->{ParserOptions}{LibParser};
$self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_fh;
$self->{ParserOptions}{ParseFuncParam} = $fh;
$self->_parse;
return $self->end_document({});
}
sub _parse_string {
my ( $self, $string ) = @_;
$self->{ParserOptions}{LibParser} = $self->_calc_new_XML_LibXML_parser_for_compatibility_with_XML_Simple_etc() unless defined $self->{ParserOptions}{LibParser};
$self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_string;
$self->{ParserOptions}{ParseFuncParam} = $string;
$self->_parse;
return $self->end_document({});
}
sub _parse_systemid {
my $self = shift;
$self->{ParserOptions}{LibParser} = $self->_calc_new_XML_LibXML_parser_for_compatibility_with_XML_Simple_etc() unless defined $self->{ParserOptions}{LibParser};
$self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_file;
$self->{ParserOptions}{ParseFuncParam} = shift;
$self->_parse;
return $self->end_document({});
}
sub parse_chunk {
my ( $self, $chunk ) = @_;
$self->{ParserOptions}{LibParser} = $self->_calc_new_XML_LibXML_parser_for_compatibility_with_XML_Simple_etc() unless defined $self->{ParserOptions}{LibParser};
$self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_xml_chunk;
$self->{ParserOptions}{LibParser}->{IS_FILTER}=1; # a hack to prevent parse_xml_chunk from issuing end_document
$self->{ParserOptions}{ParseFuncParam} = $chunk;
$self->_parse;
return;
}
sub _parse {
my $self = shift;
my $args = bless $self->{ParserOptions}, ref($self);
if (defined($self->{JOIN_CHARACTERS})) {
$args->{LibParser}->{JOIN_CHARACTERS} = $self->{JOIN_CHARACTERS};
} else {
$args->{LibParser}->{JOIN_CHARACTERS} = 0;
}
$args->{LibParser}->set_handler( $self );
eval {
$args->{ParseFunc}->($args->{LibParser}, $args->{ParseFuncParam});
};
if ( $args->{LibParser}->{SAX}->{State} == 1 ) {
croak( "SAX Exception not implemented, yet; Data ended before document ended\n" );
}
# break a possible circular reference
$args->{LibParser}->set_handler( undef );
if ( $@ ) {
croak $@;
}
return;
}
1;

View File

@@ -0,0 +1,67 @@
=head1 NAME
XML::LibXML::SAX - XML::LibXML direct SAX parser
=head1 DESCRIPTION
XML::LibXML provides an interface to libxml2 direct SAX interface. Through this
interface it is possible to generate SAX events directly while parsing a
document. While using the SAX parser XML::LibXML will not create a DOM Document
tree.
Such an interface is useful if very large XML documents have to be processed
and no DOM functions are required. By using this interface it is possible to
read data stored within an XML document directly into the application data
structures without loading the document into memory.
The SAX interface of XML::LibXML is based on the famous XML::SAX interface. It
uses the generic interface as provided by XML::SAX::Base.
Additionally to the generic functions, which are only able to process entire
documents, XML::LibXML::SAX provides I<<<<<< parse_chunk() >>>>>>. This method generates SAX events from well balanced data such as is often
provided by databases.
=head1 FEATURES
I<<<<<< NOTE: >>>>>> This feature is experimental.
You can enable character data joining which may yield a significant speed boost
in your XML processing in lower markup ratio situations by enabling the
http://xmlns.perl.org/sax/join-character-data feature of this parser. This is
done via the set_feature method like this:
$p->set_feature('http://xmlns.perl.org/sax/join-character-data', 1);
You can also specify a 0 to disable. The default is to have this feature
disabled.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,335 @@
# $Id$
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::SAX::Builder;
use strict;
use warnings;
use XML::LibXML;
use XML::NamespaceSupport;
use vars qw ($VERSION);
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
$VERSION = "2.0206"; # VERSION TEMPLATE: DO NOT CHANGE
sub new {
my $class = shift;
return bless {@_}, $class;
}
sub result { $_[0]->{LAST_DOM}; }
sub done {
my ($self) = @_;
my $dom = $self->{DOM};
$dom = $self->{Parent} unless defined $dom; # this is for parsing document chunks
delete $self->{NamespaceStack};
delete $self->{Parent};
delete $self->{DOM};
$self->{LAST_DOM} = $dom;
return $dom;
}
sub set_document_locator {
}
sub start_dtd {
my ($self, $dtd) = @_;
if (defined $dtd->{Name} and
(defined $dtd->{SystemId} or defined $dtd->{PublicId})) {
$self->{DOM}->createExternalSubset($dtd->{Name},$dtd->{PublicId},$dtd->{SystemId});
}
}
sub end_dtd {
}
sub start_document {
my ($self, $doc) = @_;
$self->{DOM} = XML::LibXML::Document->createDocument();
if ( defined $self->{Encoding} ) {
$self->xml_decl({Version => ($self->{Version} || '1.0') , Encoding => $self->{Encoding}});
}
$self->{NamespaceStack} = XML::NamespaceSupport->new;
$self->{NamespaceStack}->push_context;
$self->{Parent} = undef;
return ();
}
sub xml_decl {
my $self = shift;
my $decl = shift;
if ( defined $decl->{Version} ) {
$self->{DOM}->setVersion( $decl->{Version} );
}
if ( defined $decl->{Encoding} ) {
$self->{DOM}->setEncoding( $decl->{Encoding} );
}
return ();
}
sub end_document {
my ($self, $doc) = @_;
my $d = $self->done();
return $d;
}
sub start_prefix_mapping {
my $self = shift;
my $ns = shift;
unless ( defined $self->{DOM} or defined $self->{Parent} ) {
$self->{Parent} = XML::LibXML::DocumentFragment->new();
$self->{NamespaceStack} = XML::NamespaceSupport->new;
$self->{NamespaceStack}->push_context;
}
$self->{USENAMESPACESTACK} = 1;
$self->{NamespaceStack}->declare_prefix( $ns->{Prefix}, $ns->{NamespaceURI} );
return ();
}
sub end_prefix_mapping {
my $self = shift;
my $ns = shift;
$self->{NamespaceStack}->undeclare_prefix( $ns->{Prefix} );
return ();
}
sub start_element {
my ($self, $el) = @_;
my $node;
unless ( defined $self->{DOM} or defined $self->{Parent} ) {
$self->{Parent} = XML::LibXML::DocumentFragment->new();
$self->{NamespaceStack} = XML::NamespaceSupport->new;
$self->{NamespaceStack}->push_context;
}
if ( defined $self->{Parent} ) {
$el->{NamespaceURI} ||= "";
$node = $self->{Parent}->addNewChild( $el->{NamespaceURI},
$el->{Name} );
}
else {
if ($el->{NamespaceURI}) {
if ( defined $self->{DOM} ) {
$node = $self->{DOM}->createRawElementNS($el->{NamespaceURI},
$el->{Name});
}
else {
$node = XML::LibXML::Element->new( $el->{Name} );
$node->setNamespace( $el->{NamespaceURI},
$el->{Prefix} , 1 );
}
}
else {
if ( defined $self->{DOM} ) {
$node = $self->{DOM}->createRawElement($el->{Name});
}
else {
$node = XML::LibXML::Element->new( $el->{Name} );
}
}
$self->{DOM}->setDocumentElement($node);
}
# build namespaces
my $skip_ns= 0;
foreach my $p ( $self->{NamespaceStack}->get_declared_prefixes() ) {
$skip_ns= 1;
my $uri = $self->{NamespaceStack}->get_uri($p);
my $nodeflag = 0;
if ( defined $uri
and defined $el->{NamespaceURI}
and $uri eq $el->{NamespaceURI} ) {
# $nodeflag = 1;
next;
}
$node->setNamespace($uri, $p, 0 );
}
$self->{Parent} = $node;
$self->{NamespaceStack}->push_context;
# do attributes
foreach my $key (keys %{$el->{Attributes}}) {
my $attr = $el->{Attributes}->{$key};
if (ref($attr)) {
# catch broken name/value pairs
next unless $attr->{Name} ;
next if $self->{USENAMESPACESTACK}
and ( $attr->{Name} eq "xmlns"
or ( defined $attr->{Prefix}
and $attr->{Prefix} eq "xmlns" ) );
if ( defined $attr->{Prefix}
and $attr->{Prefix} eq "xmlns" and $skip_ns == 0 ) {
# ok, the generator does not set namespaces correctly!
my $uri = $attr->{Value};
$node->setNamespace($uri,
$attr->{LocalName},
$uri eq $el->{NamespaceURI} ? 1 : 0 );
}
else {
$node->setAttributeNS($attr->{NamespaceURI} || "",
$attr->{Name}, $attr->{Value});
}
}
else {
$node->setAttribute($key => $attr);
}
}
return ();
}
sub end_element {
my ($self, $el) = @_;
return unless $self->{Parent};
$self->{NamespaceStack}->pop_context;
$self->{Parent} = $self->{Parent}->parentNode();
return ();
}
sub start_cdata {
my $self = shift;
$self->{IN_CDATA} = 1;
return ();
}
sub end_cdata {
my $self = shift;
$self->{IN_CDATA} = 0;
return ();
}
sub characters {
my ($self, $chars) = @_;
if ( not defined $self->{DOM} and not defined $self->{Parent} ) {
$self->{Parent} = XML::LibXML::DocumentFragment->new();
$self->{NamespaceStack} = XML::NamespaceSupport->new;
$self->{NamespaceStack}->push_context;
}
return unless $self->{Parent};
my $node;
unless ( defined $chars and defined $chars->{Data} ) {
return;
}
if ( defined $self->{DOM} ) {
if ( defined $self->{IN_CDATA} and $self->{IN_CDATA} == 1 ) {
$node = $self->{DOM}->createCDATASection($chars->{Data});
}
else {
$node = $self->{Parent}->appendText($chars->{Data});
return;
}
}
elsif ( defined $self->{IN_CDATA} and $self->{IN_CDATA} == 1 ) {
$node = XML::LibXML::CDATASection->new($chars->{Data});
}
else {
$node = XML::LibXML::Text->new($chars->{Data});
}
$self->{Parent}->addChild($node);
return ();
}
sub comment {
my ($self, $chars) = @_;
my $comment;
if ( not defined $self->{DOM} and not defined $self->{Parent} ) {
$self->{Parent} = XML::LibXML::DocumentFragment->new();
$self->{NamespaceStack} = XML::NamespaceSupport->new;
$self->{NamespaceStack}->push_context;
}
unless ( defined $chars and defined $chars->{Data} ) {
return;
}
if ( defined $self->{DOM} ) {
$comment = $self->{DOM}->createComment( $chars->{Data} );
}
else {
$comment = XML::LibXML::Comment->new( $chars->{Data} );
}
if ( defined $self->{Parent} ) {
$self->{Parent}->addChild($comment);
}
else {
$self->{DOM}->addChild($comment);
}
return ();
}
sub processing_instruction {
my ( $self, $pi ) = @_;
my $PI;
return unless defined $self->{DOM};
$PI = $self->{DOM}->createPI( $pi->{Target}, $pi->{Data} );
if ( defined $self->{Parent} ) {
$self->{Parent}->addChild( $PI );
}
else {
$self->{DOM}->addChild( $PI );
}
return ();
}
sub warning {
my $self = shift;
my $error = shift;
# fill $@ but do not die seriously
eval { $error->throw; };
}
sub error {
my $self = shift;
my $error = shift;
delete $self->{NamespaceStack};
delete $self->{Parent};
delete $self->{DOM};
$error->throw;
}
sub fatal_error {
my $self = shift;
my $error = shift;
delete $self->{NamespaceStack};
delete $self->{Parent};
delete $self->{DOM};
$error->throw;
}
1;
__END__

View File

@@ -0,0 +1,58 @@
=head1 NAME
XML::LibXML::SAX::Builder - Building DOM trees from SAX events.
=head1 SYNOPSIS
use XML::LibXML::SAX::Builder;
my $builder = XML::LibXML::SAX::Builder->new();
my $gen = XML::Generator::DBI->new(Handler => $builder, dbh => $dbh);
$gen->execute("SELECT * FROM Users");
my $doc = $builder->result();
=head1 DESCRIPTION
This is a SAX handler that generates a DOM tree from SAX events. Usage is as
above. Input is accepted from any SAX1 or SAX2 event generator.
Building DOM trees from SAX events is quite easy with
XML::LibXML::SAX::Builder. The class is designed as a SAX2 final handler not as
a filter!
Since SAX is strictly stream oriented, you should not expect anything to return
from a generator. Instead you have to ask the builder instance directly to get
the document built. XML::LibXML::SAX::Builder's result() function holds the
document generated from the last SAX stream.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,158 @@
# $Id: Generator.pm 772 2009-01-23 21:42:09Z pajas
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::SAX::Generator;
use strict;
use warnings;
use XML::LibXML;
use vars qw ($VERSION);
$VERSION = "2.0206"; # VERSION TEMPLATE: DO NOT CHANGE
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
warn("This class (", __PACKAGE__, ") is deprecated!");
sub new {
my $class = shift;
unshift @_, 'Handler' unless @_ != 1;
my %p = @_;
return bless \%p, $class;
}
sub generate {
my $self = shift;
my ($node) = @_;
my $document = { Parent => undef };
$self->{Handler}->start_document($document);
process_node($self->{Handler}, $node);
$self->{Handler}->end_document($document);
}
sub process_node {
my ($handler, $node) = @_;
my $node_type = $node->getType();
if ($node_type == XML_COMMENT_NODE) {
$handler->comment( { Data => $node->getData } );
}
elsif ($node_type == XML_TEXT_NODE || $node_type == XML_CDATA_SECTION_NODE) {
# warn($node->getData . "\n");
$handler->characters( { Data => $node->getData } );
}
elsif ($node_type == XML_ELEMENT_NODE) {
# warn("<" . $node->getName . ">\n");
process_element($handler, $node);
# warn("</" . $node->getName . ">\n");
}
elsif ($node_type == XML_ENTITY_REF_NODE) {
foreach my $kid ($node->getChildnodes) {
# warn("child of entity ref: " . $kid->getType() . " called: " . $kid->getName . "\n");
process_node($handler, $kid);
}
}
elsif ($node_type == XML_DOCUMENT_NODE) {
# just get root element. Ignore other cruft.
foreach my $kid ($node->getChildnodes) {
if ($kid->getType() == XML_ELEMENT_NODE) {
process_element($handler, $kid);
last;
}
}
}
else {
warn("unknown node type: $node_type");
}
}
sub process_element {
my ($handler, $element) = @_;
my @attr;
foreach my $attr ($element->getAttributes) {
push @attr, XML::LibXML::SAX::AttributeNode->new(
Name => $attr->getName,
Value => $attr->getData,
NamespaceURI => $attr->getNamespaceURI,
Prefix => $attr->getPrefix,
LocalName => $attr->getLocalName,
);
}
my $node = {
Name => $element->getName,
Attributes => { map { $_->{Name} => $_ } @attr },
NamespaceURI => $element->getNamespaceURI,
Prefix => $element->getPrefix,
LocalName => $element->getLocalName,
};
$handler->start_element($node);
foreach my $child ($element->getChildnodes) {
process_node($handler, $child);
}
$handler->end_element($node);
}
package XML::LibXML::SAX::AttributeNode;
use overload '""' => "stringify";
sub new {
my $class = shift;
my %p = @_;
return bless \%p, $class;
}
sub stringify {
my $self = shift;
return $self->{Value};
}
1;
__END__
=head1 NAME
XML::LibXML::SAX::Generator - Generate SAX events from a LibXML tree
=head1 SYNOPSIS
my $handler = MySAXHandler->new();
my $generator = XML::LibXML::SAX::Generator->new(Handler => $handler);
my $dom = XML::LibXML->new->parse_file("foo.xml");
$generator->generate($dom);
=head1 DESCRIPTION
THIS CLASS IS DEPRECATED! Use XML::LibXML::SAX::Parser instead!
This helper class allows you to generate SAX events from any XML::LibXML
node, and all it's sub-nodes. This basically gives you interop from
XML::LibXML to other modules that may implement SAX.
It uses SAX2 style, but should be compatible with anything SAX1, by use
of stringification overloading.
There is nothing to really know about, beyond the synopsis above, and
a general knowledge of how to use SAX, which is beyond the scope here.
=cut

View File

@@ -0,0 +1,266 @@
# $Id$
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::SAX::Parser;
use strict;
use warnings;
use vars qw($VERSION @ISA);
use XML::LibXML;
use XML::LibXML::Common qw(:libxml);
use XML::SAX::Base;
use XML::SAX::DocumentLocator;
$VERSION = "2.0206"; # VERSION TEMPLATE: DO NOT CHANGE
@ISA = ('XML::SAX::Base');
sub CLONE_SKIP {
return $XML::LibXML::__threads_shared ? 0 : 1;
}
sub _parse_characterstream {
my ($self, $fh, $options) = @_;
die "parsing a characterstream is not supported at this time";
}
sub _parse_bytestream {
my ($self, $fh, $options) = @_;
my $parser = XML::LibXML->new();
my $doc = exists($options->{Source}{SystemId}) ? $parser->parse_fh($fh, $options->{Source}{SystemId}) : $parser->parse_fh($fh);
$self->generate($doc);
}
sub _parse_string {
my ($self, $str, $options) = @_;
my $parser = XML::LibXML->new();
my $doc = exists($options->{Source}{SystemId}) ? $parser->parse_string($str, $options->{Source}{SystemId}) : $parser->parse_string($str);
$self->generate($doc);
}
sub _parse_systemid {
my ($self, $sysid, $options) = @_;
my $parser = XML::LibXML->new();
my $doc = $parser->parse_file($sysid);
$self->generate($doc);
}
sub generate {
my $self = shift;
my ($node) = @_;
my $doc = $node->ownerDocument();
{
# precompute some DocumentLocator values
my %locator = (
PublicId => undef,
SystemId => undef,
Encoding => undef,
XMLVersion => undef,
);
my $dtd = defined $doc ? $doc->externalSubset() : undef;
if (defined $dtd) {
$locator{PublicId} = $dtd->publicId();
$locator{SystemId} = $dtd->systemId();
}
if (defined $doc) {
$locator{Encoding} = $doc->encoding();
$locator{XMLVersion} = $doc->version();
}
$self->set_document_locator(
XML::SAX::DocumentLocator->new(
sub { $locator{PublicId} },
sub { $locator{SystemId} },
sub { defined($self->{current_node}) ? $self->{current_node}->line_number() : undef },
sub { 1 },
sub { $locator{Encoding} },
sub { $locator{XMLVersion} },
),
);
}
if ( $node->nodeType() == XML_DOCUMENT_NODE
|| $node->nodeType == XML_HTML_DOCUMENT_NODE ) {
$self->start_document({});
$self->xml_decl({Version => $node->getVersion, Encoding => $node->getEncoding});
$self->process_node($node);
$self->end_document({});
}
}
sub process_node {
my ($self, $node) = @_;
local $self->{current_node} = $node;
my $node_type = $node->nodeType();
if ($node_type == XML_COMMENT_NODE) {
$self->comment( { Data => $node->getData } );
}
elsif ($node_type == XML_TEXT_NODE
|| $node_type == XML_CDATA_SECTION_NODE) {
# warn($node->getData . "\n");
$self->characters( { Data => $node->nodeValue } );
}
elsif ($node_type == XML_ELEMENT_NODE) {
# warn("<" . $node->getName . ">\n");
$self->process_element($node);
# warn("</" . $node->getName . ">\n");
}
elsif ($node_type == XML_ENTITY_REF_NODE) {
foreach my $kid ($node->childNodes) {
# warn("child of entity ref: " . $kid->getType() . " called: " . $kid->getName . "\n");
$self->process_node($kid);
}
}
elsif ($node_type == XML_DOCUMENT_NODE
|| $node_type == XML_HTML_DOCUMENT_NODE
|| $node_type == XML_DOCUMENT_FRAG_NODE) {
# sometimes it is just useful to generate SAX events from
# a document fragment (very good with filters).
foreach my $kid ($node->childNodes) {
$self->process_node($kid);
}
}
elsif ($node_type == XML_PI_NODE) {
$self->processing_instruction( { Target => $node->getName, Data => $node->getData } );
}
elsif ($node_type == XML_COMMENT_NODE) {
$self->comment( { Data => $node->getData } );
}
elsif ( $node_type == XML_XINCLUDE_START
|| $node_type == XML_XINCLUDE_END ) {
# ignore!
# i may want to handle this one day, dunno yet
}
elsif ($node_type == XML_DTD_NODE ) {
# ignore!
# i will support DTDs, but had no time yet.
}
else {
# warn("unsupported node type: $node_type");
}
}
sub process_element {
my ($self, $element) = @_;
my $attribs = {};
my @ns_maps = $element->getNamespaces;
foreach my $ns (@ns_maps) {
$self->start_prefix_mapping(
{
NamespaceURI => $ns->href,
Prefix => ( defined $ns->localname ? $ns->localname : ''),
}
);
}
foreach my $attr ($element->attributes) {
my $key;
# warn("Attr: $attr -> ", $attr->getName, " = ", $attr->getData, "\n");
# this isa dump thing...
if ($attr->isa('XML::LibXML::Namespace')) {
# TODO This needs fixing modulo agreeing on what
# is the right thing to do here.
unless ( defined $attr->name ) {
## It's an atter like "xmlns='foo'"
$attribs->{"{}xmlns"} =
{
Name => "xmlns",
LocalName => "xmlns",
Prefix => "",
Value => $attr->href,
NamespaceURI => "",
};
}
else {
my $prefix = "xmlns";
my $localname = $attr->localname;
my $key = "{http://www.w3.org/2000/xmlns/}";
my $name = "xmlns";
if ( defined $localname ) {
$key .= $localname;
$name.= ":".$localname;
}
$attribs->{$key} =
{
Name => $name,
Value => $attr->href,
NamespaceURI => "http://www.w3.org/2000/xmlns/",
Prefix => $prefix,
LocalName => $localname,
};
}
}
else {
my $ns = $attr->namespaceURI;
$ns = '' unless defined $ns;
$key = "{$ns}".$attr->localname;
## Not sure why, but $attr->name is coming through stripped
## of its prefix, so we need to hand-assemble a real name.
my $name = $attr->name;
$name = "" unless defined $name;
my $prefix = $attr->prefix;
$prefix = "" unless defined $prefix;
$name = "$prefix:$name"
if index( $name, ":" ) < 0 && length $prefix;
$attribs->{$key} =
{
Name => $name,
Value => $attr->value,
NamespaceURI => $ns,
Prefix => $prefix,
LocalName => $attr->localname,
};
}
# use Data::Dumper;
# warn("Attr made: ", Dumper($attribs->{$key}), "\n");
}
my $node = {
Name => $element->nodeName,
Attributes => $attribs,
NamespaceURI => $element->namespaceURI,
Prefix => $element->prefix || "",
LocalName => $element->localname,
};
$self->start_element($node);
foreach my $child ($element->childNodes) {
$self->process_node($child);
}
my $end_node = { %$node };
delete $end_node->{Attributes};
$self->end_element($end_node);
foreach my $ns (@ns_maps) {
$self->end_prefix_mapping(
{
NamespaceURI => $ns->href,
Prefix => ( defined $ns->localname ? $ns->localname : ''),
}
);
}
}
1;
__END__

View File

@@ -0,0 +1,89 @@
=head1 NAME
XML::LibXML::Schema - XML Schema Validation
=head1 SYNOPSIS
use XML::LibXML;
$doc = XML::LibXML->new->parse_file($url);
$xmlschema = XML::LibXML::Schema->new( location => $filename_or_url, no_network => 1 );
$xmlschema = XML::LibXML::Schema->new( string => $xmlschemastring, no_network => 1 );
eval { $xmlschema->validate( $doc ); };
=head1 DESCRIPTION
The XML::LibXML::Schema class is a tiny frontend to libxml2's XML Schema
implementation. Currently it supports only schema parsing and document
validation. As of 2.6.32, libxml2 only supports decimal types up to 24 digits
(the standard requires at least 18).
=head1 METHODS
=over 4
=item new
$xmlschema = XML::LibXML::Schema->new( location => $filename_or_url, no_network => 1 );
$xmlschema = XML::LibXML::Schema->new( string => $xmlschemastring, no_network => 1 );
The constructor of XML::LibXML::Schema needs to be called with list of
parameters. At least location or string parameter is required to specify source
of schema. Optional parameter no_network set to 1 cause that parser would not
access network and optional parameter recover set 1 cause that parser would not
call die() on errors.
It is important, that each schema only have a single source.
The location parameter allows one to parse a schema from the filesystem or a
(non-HTTPS) URL.
The string parameter will parse the schema from the given XML string.
Note that the constructor will die() if the schema does not meed the
constraints of the XML Schema specification.
=item validate
eval { $xmlschema->validate( $doc ); };
This function allows one to validate a (parsed) document against the given XML
Schema. The argument of this function should be a L<<<<<< XML::LibXML::Document >>>>>> object. If this function succeeds, it will return 0, otherwise it will die()
and report the errors found. Because of this validate() should be always
evaluated.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,190 @@
=head1 NAME
XML::LibXML::Text - XML::LibXML Class for Text Nodes
=head1 SYNOPSIS
use XML::LibXML;
# Only methods specific to Text nodes are listed here,
# see the XML::LibXML::Node manpage for other methods
$text = XML::LibXML::Text->new( $content );
$nodedata = $text->data;
$text->setData( $text_content );
$text->substringData($offset, $length);
$text->appendData( $somedata );
$text->insertData($offset, $string);
$text->deleteData($offset, $length);
$text->deleteDataString($remstring, $all);
$text->replaceData($offset, $length, $string);
$text->replaceDataString($old, $new, $flag);
$text->replaceDataRegEx( $search_cond, $replace_cond, $reflags );
=head1 DESCRIPTION
Unlike the DOM specification, XML::LibXML implements the text node as the base
class of all character data node. Therefore there exists no CharacterData
class. This allows one to apply methods of text nodes also to Comments and
CDATA-sections.
=head1 METHODS
The class inherits from L<<<<<< XML::LibXML::Node >>>>>>. The documentation for Inherited methods is not listed here.
Many functions listed here are extensively documented in the DOM Level 3 specification (L<<<<<< http://www.w3.org/TR/DOM-Level-3-Core/ >>>>>>). Please refer to the specification for extensive documentation.
=over 4
=item new
$text = XML::LibXML::Text->new( $content );
The constructor of the class. It creates an unbound text node.
=item data
$nodedata = $text->data;
Although there exists the C<<<<<< nodeValue >>>>>> attribute in the Node class, the DOM specification defines data as a separate
attribute. C<<<<<< XML::LibXML >>>>>> implements these two attributes not as different attributes, but as aliases,
such as C<<<<<< libxml2 >>>>>> does. Therefore
$text->data;
and
$text->nodeValue;
will have the same result and are not different entities.
=item setData($string)
$text->setData( $text_content );
This function sets or replaces text content to a node. The node has to be of
the type "text", "cdata" or "comment".
=item substringData($offset,$length)
$text->substringData($offset, $length);
Extracts a range of data from the node. (DOM Spec) This function takes the two
parameters $offset and $length and returns the sub-string, if available.
If the node contains no data or $offset refers to an non-existing string index,
this function will return I<<<<<< undef >>>>>>. If $length is out of range C<<<<<< substringData >>>>>> will return the data starting at $offset instead of causing an error.
=item appendData($string)
$text->appendData( $somedata );
Appends a string to the end of the existing data. If the current text node
contains no data, this function has the same effect as C<<<<<< setData >>>>>>.
=item insertData($offset,$string)
$text->insertData($offset, $string);
Inserts the parameter $string at the given $offset of the existing data of the
node. This operation will not remove existing data, but change the order of the
existing data.
The $offset has to be a positive value. If $offset is out of range, C<<<<<< insertData >>>>>> will have the same behaviour as C<<<<<< appendData >>>>>>.
=item deleteData($offset, $length)
$text->deleteData($offset, $length);
This method removes a chunk from the existing node data at the given offset.
The $length parameter tells, how many characters should be removed from the
string.
=item deleteDataString($string, [$all])
$text->deleteDataString($remstring, $all);
This method removes a chunk from the existing node data. Since the DOM spec is
quite unhandy if you already know C<<<<<< which >>>>>> string to remove from a text node, this method allows more perlish code :)
The functions takes two parameters: I<<<<<< $string >>>>>> and optional the I<<<<<< $all >>>>>> flag. If $all is not set, I<<<<<< undef >>>>>> or I<<<<<< 0 >>>>>>, C<<<<<< deleteDataString >>>>>> will remove only the first occurrence of $string. If $all is I<<<<<< TRUE >>>>>>C<<<<<< deleteDataString >>>>>> will remove all occurrences of I<<<<<< $string >>>>>> from the node data.
=item replaceData($offset, $length, $string)
$text->replaceData($offset, $length, $string);
The DOM style version to replace node data.
=item replaceDataString($oldstring, $newstring, [$all])
$text->replaceDataString($old, $new, $flag);
The more programmer friendly version of replaceData() :)
Instead of giving offsets and length one can specify the exact string (I<<<<<< $oldstring >>>>>>) to be replaced. Additionally the I<<<<<< $all >>>>>> flag allows one to replace all occurrences of I<<<<<< $oldstring >>>>>>.
=item replaceDataRegEx( $search_cond, $replace_cond, $reflags )
$text->replaceDataRegEx( $search_cond, $replace_cond, $reflags );
This method replaces the node's data by a C<<<<<< simple >>>>>> regular expression. Optional, this function allows one to pass some flags that
will be added as flag to the replace statement.
I<<<<<< NOTE: >>>>>> This is a shortcut for
my $datastr = $node->getData();
$datastr =~ s/somecond/replacement/g; # 'g' is just an example for any flag
$node->setData( $datastr );
This function can make things easier to read for simple replacements. For more
complex variants it is recommended to use the code snippet above.
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,147 @@
# $Id: XPathContext.pm 422 2002-11-08 17:10:30Z phish $
#
# This is free software, you may use it and distribute it under the same terms as
# Perl itself.
#
# Copyright 2001-2003 AxKit.com Ltd., 2002-2006 Christian Glahn, 2006-2009 Petr Pajas
#
#
package XML::LibXML::XPathContext;
use strict;
use warnings;
use vars qw($VERSION @ISA $USE_LIBXML_DATA_TYPES);
use Carp;
use XML::LibXML;
use XML::LibXML::NodeList;
$VERSION = "2.0206"; # VERSION TEMPLATE: DO NOT CHANGE
# should LibXML XPath data types be used for simple objects
# when passing parameters to extension functions (default: no)
$USE_LIBXML_DATA_TYPES = 0;
sub CLONE_SKIP { 1 }
sub findnodes {
my ($self, $xpath, $node) = @_;
my @nodes = $self->_guarded_find_call('_findnodes', $node, $xpath);
if (wantarray) {
return @nodes;
}
else {
return XML::LibXML::NodeList->new(@nodes);
}
}
sub find {
my ($self, $xpath, $node) = @_;
my ($type, @params) = $self->_guarded_find_call('_find', $node, $xpath,0);
if ($type) {
return $type->new(@params);
}
return undef;
}
sub exists {
my ($self, $xpath, $node) = @_;
my (undef, $value) = $self->_guarded_find_call('_find', $node, $xpath,1);
return $value;
}
sub findvalue {
my $self = shift;
return $self->find(@_)->to_literal->value;
}
sub _guarded_find_call {
my ($self, $method, $node)=(shift,shift,shift);
my $prev_node;
if (ref($node)) {
$prev_node = $self->getContextNode();
$self->setContextNode($node);
}
my @ret;
eval {
@ret = $self->$method(@_);
};
$self->_free_node_pool;
$self->setContextNode($prev_node) if ref($node);
if ($@) {
my $err = $@;
chomp $err;
croak $err;
}
return @ret;
}
sub registerFunction {
my ($self, $name, $sub) = @_;
$self->registerFunctionNS($name, undef, $sub);
return;
}
sub unregisterNs {
my ($self, $prefix) = @_;
$self->registerNs($prefix, undef);
return;
}
sub unregisterFunction {
my ($self, $name) = @_;
$self->registerFunctionNS($name, undef, undef);
return;
}
sub unregisterFunctionNS {
my ($self, $name, $ns) = @_;
$self->registerFunctionNS($name, $ns, undef);
return;
}
sub unregisterVarLookupFunc {
my ($self) = @_;
$self->registerVarLookupFunc(undef, undef);
return;
}
# extension function perl dispatcher
# borrowed from XML::LibXSLT
sub _perl_dispatcher {
my $func = shift;
my @params = @_;
my @perlParams;
my $i = 0;
while (@params) {
my $type = shift(@params);
if ($type eq 'XML::LibXML::Literal' or
$type eq 'XML::LibXML::Number' or
$type eq 'XML::LibXML::Boolean')
{
my $val = shift(@params);
unshift(@perlParams, $USE_LIBXML_DATA_TYPES ? $type->new($val) : $val);
}
elsif ($type eq 'XML::LibXML::NodeList') {
my $node_count = shift(@params);
unshift(@perlParams, $type->new(splice(@params, 0, $node_count)));
}
}
$func = "main::$func" unless ref($func) || $func =~ /(.+)::/;
no strict 'refs';
my $res = $func->(@perlParams);
return $res;
}
1;

View File

@@ -0,0 +1,382 @@
=head1 NAME
XML::LibXML::XPathContext - XPath Evaluation
=head1 SYNOPSIS
my $xpc = XML::LibXML::XPathContext->new();
my $xpc = XML::LibXML::XPathContext->new($node);
$xpc->registerNs($prefix, $namespace_uri)
$xpc->unregisterNs($prefix)
$uri = $xpc->lookupNs($prefix)
$xpc->registerVarLookupFunc($callback, $data)
$data = $xpc->getVarLookupData();
$callback = $xpc->getVarLookupFunc();
$xpc->unregisterVarLookupFunc($name);
$xpc->registerFunctionNS($name, $uri, $callback)
$xpc->unregisterFunctionNS($name, $uri)
$xpc->registerFunction($name, $callback)
$xpc->unregisterFunction($name)
@nodes = $xpc->findnodes($xpath)
@nodes = $xpc->findnodes($xpath, $context_node )
$nodelist = $xpc->findnodes($xpath, $context_node )
$object = $xpc->find($xpath )
$object = $xpc->find($xpath, $context_node )
$value = $xpc->findvalue($xpath )
$value = $xpc->findvalue($xpath, $context_node )
$bool = $xpc->exists( $xpath_expression, $context_node );
$xpc->setContextNode($node)
my $node = $xpc->getContextNode;
$xpc->setContextPosition($position)
my $position = $xpc->getContextPosition;
$xpc->setContextSize($size)
my $size = $xpc->getContextSize;
$xpc->setContextNode($node)
=head1 DESCRIPTION
The XML::LibXML::XPathContext class provides an almost complete interface to
libxml2's XPath implementation. With XML::LibXML::XPathContext, it is possible
to evaluate XPath expressions in the context of arbitrary node, context size,
and context position, with a user-defined namespace-prefix mapping, custom
XPath functions written in Perl, and even a custom XPath variable resolver.
=head1 EXAMPLES
=head2 Namespaces
This example demonstrates C<<<<<< registerNs() >>>>>> method. It finds all paragraph nodes in an XHTML document.
my $xc = XML::LibXML::XPathContext->new($xhtml_doc);
$xc->registerNs('xhtml', 'http://www.w3.org/1999/xhtml');
my @nodes = $xc->findnodes('//xhtml:p');
=head2 Custom XPath functions
This example demonstrates C<<<<<< registerFunction() >>>>>> method by defining a function filtering nodes based on a Perl regular
expression:
sub grep_nodes {
my ($nodelist,$regexp) = @_;
my $result = XML::LibXML::NodeList->new;
for my $node ($nodelist->get_nodelist()) {
$result->push($node) if $node->textContent =~ $regexp;
}
return $result;
};
my $xc = XML::LibXML::XPathContext->new($node);
$xc->registerFunction('grep_nodes', \&grep_nodes);
my @nodes = $xc->findnodes('//section[grep_nodes(para,"\bsearch(ing|es)?\b")]');
=head2 Variables
This example demonstrates C<<<<<< registerVarLookup() >>>>>> method. We use XPath variables to recycle results of previous evaluations:
sub var_lookup {
my ($varname,$ns,$data)=@_;
return $data->{$varname};
}
my $areas = XML::LibXML->new->parse_file('areas.xml');
my $empl = XML::LibXML->new->parse_file('employees.xml');
my $xc = XML::LibXML::XPathContext->new($empl);
my %variables = (
A => $xc->find('/employees/employee[@salary>10000]'),
B => $areas->find('/areas/area[district='Brooklyn']/street'),
);
# get names of employees from $A working in an area listed in $B
$xc->registerVarLookupFunc(\&var_lookup, \%variables);
my @nodes = $xc->findnodes('$A[work_area/street = $B]/name');
=head1 METHODS
=over 4
=item new
my $xpc = XML::LibXML::XPathContext->new();
Creates a new XML::LibXML::XPathContext object without a context node.
my $xpc = XML::LibXML::XPathContext->new($node);
Creates a new XML::LibXML::XPathContext object with the context node set to C<<<<<< $node >>>>>>.
=item registerNs
$xpc->registerNs($prefix, $namespace_uri)
Registers namespace C<<<<<< $prefix >>>>>> to C<<<<<< $namespace_uri >>>>>>.
=item unregisterNs
$xpc->unregisterNs($prefix)
Unregisters namespace C<<<<<< $prefix >>>>>>.
=item lookupNs
$uri = $xpc->lookupNs($prefix)
Returns namespace URI registered with C<<<<<< $prefix >>>>>>. If C<<<<<< $prefix >>>>>> is not registered to any namespace URI returns C<<<<<< undef >>>>>>.
=item registerVarLookupFunc
$xpc->registerVarLookupFunc($callback, $data)
Registers variable lookup function C<<<<<< $callback >>>>>>. The registered function is executed by the XPath engine each time an XPath
variable is evaluated. It takes three arguments: C<<<<<< $data >>>>>>, variable name, and variable ns-URI and must return one value: a number or
string or any C<<<<<< XML::LibXML:: >>>>>> object that can be a result of findnodes: Boolean, Literal, Number, Node (e.g.
Document, Element, etc.), or NodeList. For convenience, simple (non-blessed)
array references containing only L<<<<<< XML::LibXML::Node >>>>>> objects can be used instead of an L<<<<<< XML::LibXML::NodeList >>>>>>.
=item getVarLookupData
$data = $xpc->getVarLookupData();
Returns the data that have been associated with a variable lookup function
during a previous call to C<<<<<< registerVarLookupFunc >>>>>>.
=item getVarLookupFunc
$callback = $xpc->getVarLookupFunc();
Returns the variable lookup function previously registered with C<<<<<< registerVarLookupFunc >>>>>>.
=item unregisterVarLookupFunc
$xpc->unregisterVarLookupFunc($name);
Unregisters variable lookup function and the associated lookup data.
=item registerFunctionNS
$xpc->registerFunctionNS($name, $uri, $callback)
Registers an extension function C<<<<<< $name >>>>>> in C<<<<<< $uri >>>>>> namespace. C<<<<<< $callback >>>>>> must be a CODE reference. The arguments of the callback function are either
simple scalars or C<<<<<< XML::LibXML::* >>>>>> objects depending on the XPath argument types. The function is responsible for
checking the argument number and types. Result of the callback code must be a
single value of the following types: a simple scalar (number, string) or an
arbitrary C<<<<<< XML::LibXML::* >>>>>> object that can be a result of findnodes: Boolean, Literal, Number, Node (e.g.
Document, Element, etc.), or NodeList. For convenience, simple (non-blessed)
array references containing only L<<<<<< XML::LibXML::Node >>>>>> objects can be used instead of a L<<<<<< XML::LibXML::NodeList >>>>>>.
=item unregisterFunctionNS
$xpc->unregisterFunctionNS($name, $uri)
Unregisters extension function C<<<<<< $name >>>>>> in C<<<<<< $uri >>>>>> namespace. Has the same effect as passing C<<<<<< undef >>>>>> as C<<<<<< $callback >>>>>> to registerFunctionNS.
=item registerFunction
$xpc->registerFunction($name, $callback)
Same as C<<<<<< registerFunctionNS >>>>>> but without a namespace.
=item unregisterFunction
$xpc->unregisterFunction($name)
Same as C<<<<<< unregisterFunctionNS >>>>>> but without a namespace.
=item findnodes
@nodes = $xpc->findnodes($xpath)
@nodes = $xpc->findnodes($xpath, $context_node )
$nodelist = $xpc->findnodes($xpath, $context_node )
Performs the xpath statement on the current node and returns the result as an
array. In scalar context, returns an L<<<<<< XML::LibXML::NodeList >>>>>> object. Optionally, a node may be passed as a second argument to set the
context node for the query.
The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
=item find
$object = $xpc->find($xpath )
$object = $xpc->find($xpath, $context_node )
Performs the xpath expression using the current node as the context of the
expression, and returns the result depending on what type of result the XPath
expression had. For example, the XPath C<<<<<< 1 * 3 + 52 >>>>>> results in an L<<<<<< XML::LibXML::Number >>>>>> object being returned. Other expressions might return a L<<<<<< XML::LibXML::Boolean >>>>>> object, or a L<<<<<< XML::LibXML::Literal >>>>>> object (a string). Each of those objects uses Perl's overload feature to ``do
the right thing'' in different contexts. Optionally, a node may be passed as a
second argument to set the context node for the query.
The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
=item findvalue
$value = $xpc->findvalue($xpath )
$value = $xpc->findvalue($xpath, $context_node )
Is exactly equivalent to:
$xpc->find( $xpath, $context_node )->to_literal;
That is, it returns the literal value of the results. This enables you to
ensure that you get a string back from your search, allowing certain shortcuts.
This could be used as the equivalent of <xsl:value-of select=``some_xpath''/>.
Optionally, a node may be passed in the second argument to set the context node
for the query.
The xpath expression can be passed either as a string, or as a L<<<<<< XML::LibXML::XPathExpression >>>>>> object.
=item exists
$bool = $xpc->exists( $xpath_expression, $context_node );
This method behaves like I<<<<<< findnodes >>>>>>, except that it only returns a boolean value (1 if the expression matches a
node, 0 otherwise) and may be faster than I<<<<<< findnodes >>>>>>, because the XPath evaluation may stop early on the first match (this is true
for libxml2 >= 2.6.27).
For XPath expressions that do not return node-set, the method returns true if
the returned value is a non-zero number or a non-empty string.
=item setContextNode
$xpc->setContextNode($node)
Set the current context node.
=item getContextNode
my $node = $xpc->getContextNode;
Get the current context node.
=item setContextPosition
$xpc->setContextPosition($position)
Set the current context position. By default, this value is -1 (and evaluating
XPath function C<<<<<< position() >>>>>> in the initial context raises an XPath error), but can be set to any value up
to context size. This usually only serves to cheat the XPath engine to return
given position when C<<<<<< position() >>>>>> XPath function is called. Setting this value to -1 restores the default
behavior.
=item getContextPosition
my $position = $xpc->getContextPosition;
Get the current context position.
=item setContextSize
$xpc->setContextSize($size)
Set the current context size. By default, this value is -1 (and evaluating
XPath function C<<<<<< last() >>>>>> in the initial context raises an XPath error), but can be set to any
non-negative value. This usually only serves to cheat the XPath engine to
return the given value when C<<<<<< last() >>>>>> XPath function is called. If context size is set to 0, position is
automatically also set to 0. If context size is positive, position is
automatically set to 1. Setting context size to -1 restores the default
behavior.
=item getContextSize
my $size = $xpc->getContextSize;
Get the current context size.
=item setContextNode
$xpc->setContextNode($node)
Set the current context node.
=back
=head1 BUGS AND CAVEATS
XML::LibXML::XPathContext objects I<<<<<< are >>>>>> reentrant, meaning that you can call methods of an XML::LibXML::XPathContext
even from XPath extension functions registered with the same object or from a
variable lookup function. On the other hand, you should rather avoid
registering new extension functions, namespaces and a variable lookup function
from within extension functions and a variable lookup function, unless you want
to experience untested behavior.
=head1 AUTHORS
Ilya Martynov and Petr Pajas, based on XML::LibXML and XML::LibXSLT code by
Matt Sergeant and Christian Glahn.
=head1 HISTORICAL REMARK
Prior to XML::LibXML 1.61 this module was distributed separately for
maintenance reasons.
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

View File

@@ -0,0 +1,72 @@
=head1 NAME
XML::LibXML::XPathExpression - XML::LibXML::XPathExpression - interface to libxml2 pre-compiled XPath expressions
=head1 SYNOPSIS
use XML::LibXML;
my $compiled_xpath = XML::LibXML::XPathExpression->new('//foo[@bar="baz"][position()<4]');
# interface from XML::LibXML::Node
my $result = $node->find($compiled_xpath);
my @nodes = $node->findnodes($compiled_xpath);
my $value = $node->findvalue($compiled_xpath);
# interface from XML::LibXML::XPathContext
my $result = $xpc->find($compiled_xpath,$node);
my @nodes = $xpc->findnodes($compiled_xpath,$node);
my $value = $xpc->findvalue($compiled_xpath,$node);
$compiled = XML::LibXML::XPathExpression->new( xpath_string );
=head1 DESCRIPTION
This is a perl interface to libxml2's pre-compiled XPath expressions.
Pre-compiling an XPath expression can give in some performance benefit if the
same XPath query is evaluated many times. C<<<<<< XML::LibXML::XPathExpression >>>>>> objects can be passed to all C<<<<<< find... >>>>>> functions C<<<<<< XML::LibXML >>>>>> that expect an XPath expression.
=over 4
=item new()
$compiled = XML::LibXML::XPathExpression->new( xpath_string );
The constructor takes an XPath 1.0 expression as a string and returns an object
representing the pre-compiled expressions (the actual data structure is
internal to libxml2).
=back
=head1 AUTHORS
Matt Sergeant,
Christian Glahn,
Petr Pajas
=head1 VERSION
2.0206
=head1 COPYRIGHT
2001-2007, AxKit.com Ltd.
2002-2006, Christian Glahn.
2006-2009, Petr Pajas.
=cut
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.