Initial Commit
This commit is contained in:
736
database/perl/vendor/lib/Mojo/DOM/CSS.pm
vendored
Normal file
736
database/perl/vendor/lib/Mojo/DOM/CSS.pm
vendored
Normal file
@@ -0,0 +1,736 @@
|
||||
package Mojo::DOM::CSS;
|
||||
use Mojo::Base -base;
|
||||
|
||||
use Carp qw(croak);
|
||||
use Mojo::Util qw(dumper trim);
|
||||
|
||||
use constant DEBUG => $ENV{MOJO_DOM_CSS_DEBUG} || 0;
|
||||
|
||||
has 'tree';
|
||||
|
||||
my $ESCAPE_RE = qr/\\[^0-9a-fA-F]|\\[0-9a-fA-F]{1,6}/;
|
||||
my $ATTR_RE = qr/
|
||||
\[
|
||||
((?:$ESCAPE_RE|[\w\-])+) # Key
|
||||
(?:
|
||||
(\W)?= # Operator
|
||||
(?:"((?:\\"|[^"])*)"|'((?:\\'|[^'])*)'|([^\]]+?)) # Value
|
||||
(?:\s+(?:(i|I)|s|S))? # Case-sensitivity
|
||||
)?
|
||||
\]
|
||||
/x;
|
||||
|
||||
sub matches {
|
||||
my $tree = shift->tree;
|
||||
return $tree->[0] ne 'tag' ? undef : _match(_compile(@_), $tree, $tree, _root($tree));
|
||||
}
|
||||
|
||||
sub select { _select(0, shift->tree, _compile(@_)) }
|
||||
sub select_one { _select(1, shift->tree, _compile(@_)) }
|
||||
|
||||
sub _absolutize { [map { _is_scoped($_) ? $_ : [[['pc', 'scope']], ' ', @$_] } @{shift()}] }
|
||||
|
||||
sub _ancestor {
|
||||
my ($selectors, $current, $tree, $scope, $one, $pos) = @_;
|
||||
|
||||
while ($current ne $scope && $current->[0] ne 'root' && ($current = $current->[3])) {
|
||||
return 1 if _combinator($selectors, $current, $tree, $scope, $pos);
|
||||
return undef if $current eq $scope;
|
||||
last if $one;
|
||||
}
|
||||
|
||||
return undef;
|
||||
}
|
||||
|
||||
sub _attr {
|
||||
my ($name_re, $value_re, $current) = @_;
|
||||
|
||||
my $attrs = $current->[2];
|
||||
for my $name (keys %$attrs) {
|
||||
my $value = $attrs->{$name};
|
||||
next if $name !~ $name_re || (!defined $value && defined $value_re);
|
||||
return 1 if !(defined $value && defined $value_re) || $value =~ $value_re;
|
||||
}
|
||||
|
||||
return undef;
|
||||
}
|
||||
|
||||
sub _combinator {
|
||||
my ($selectors, $current, $tree, $scope, $pos) = @_;
|
||||
|
||||
# Selector
|
||||
return undef unless my $c = $selectors->[$pos];
|
||||
if (ref $c) {
|
||||
return undef unless _selector($c, $current, $tree, $scope);
|
||||
return 1 unless $c = $selectors->[++$pos];
|
||||
}
|
||||
|
||||
# ">" (parent only)
|
||||
return _ancestor($selectors, $current, $tree, $scope, 1, ++$pos) if $c eq '>';
|
||||
|
||||
# "~" (preceding siblings)
|
||||
return _sibling($selectors, $current, $tree, $scope, 0, ++$pos) if $c eq '~';
|
||||
|
||||
# "+" (immediately preceding siblings)
|
||||
return _sibling($selectors, $current, $tree, $scope, 1, ++$pos) if $c eq '+';
|
||||
|
||||
# " " (ancestor)
|
||||
return _ancestor($selectors, $current, $tree, $scope, 0, ++$pos);
|
||||
}
|
||||
|
||||
sub _compile {
|
||||
my ($css, %ns) = (trim('' . shift), @_);
|
||||
|
||||
my $group = [[]];
|
||||
while (my $selectors = $group->[-1]) {
|
||||
push @$selectors, [] unless @$selectors && ref $selectors->[-1];
|
||||
my $last = $selectors->[-1];
|
||||
|
||||
# Separator
|
||||
if ($css =~ /\G\s*,\s*/gc) { push @$group, [] }
|
||||
|
||||
# Combinator
|
||||
elsif ($css =~ /\G\s*([ >+~])\s*/gc) {
|
||||
push @$last, ['pc', 'scope'] unless @$last;
|
||||
push @$selectors, $1;
|
||||
}
|
||||
|
||||
# Class or ID
|
||||
elsif ($css =~ /\G([.#])((?:$ESCAPE_RE\s|\\.|[^,.#:[ >~+])+)/gco) {
|
||||
my ($name, $op) = $1 eq '.' ? ('class', '~') : ('id', '');
|
||||
push @$last, ['attr', _name($name), _value($op, $2)];
|
||||
}
|
||||
|
||||
# Attributes
|
||||
elsif ($css =~ /\G$ATTR_RE/gco) { push @$last, ['attr', _name($1), _value($2 // '', $3 // $4 // $5, $6)] }
|
||||
|
||||
# Pseudo-class
|
||||
elsif ($css =~ /\G:([\w\-]+)(?:\(((?:\([^)]+\)|[^)])+)\))?/gcs) {
|
||||
my ($name, $args) = (lc $1, $2);
|
||||
|
||||
# ":is" and ":not" (contains more selectors)
|
||||
$args = _compile($args, %ns) if $name eq 'has' || $name eq 'is' || $name eq 'not';
|
||||
|
||||
# ":nth-*" (with An+B notation)
|
||||
$args = _equation($args) if $name =~ /^nth-/;
|
||||
|
||||
# ":first-*" (rewrite to ":nth-*")
|
||||
($name, $args) = ("nth-$1", [0, 1]) if $name =~ /^first-(.+)$/;
|
||||
|
||||
# ":last-*" (rewrite to ":nth-*")
|
||||
($name, $args) = ("nth-$name", [-1, 1]) if $name =~ /^last-/;
|
||||
|
||||
push @$last, ['pc', $name, $args];
|
||||
}
|
||||
|
||||
# Tag
|
||||
elsif ($css =~ /\G((?:$ESCAPE_RE\s|\\.|[^,.#:[ >~+])+)/gco) {
|
||||
my $alias = (my $name = $1) =~ s/^([^|]*)\|// && $1 ne '*' ? $1 : undef;
|
||||
my $ns = length $alias ? $ns{$alias} // return [['invalid']] : $alias;
|
||||
push @$last, ['tag', $name eq '*' ? undef : _name($name), _unescape($ns)];
|
||||
}
|
||||
|
||||
else { pos $css < length $css ? croak "Unknown CSS selector: $css" : last }
|
||||
}
|
||||
|
||||
warn qq{-- CSS Selector ($css)\n@{[dumper $group]}} if DEBUG;
|
||||
return $group;
|
||||
}
|
||||
|
||||
sub _empty { $_[0][0] eq 'comment' || $_[0][0] eq 'pi' }
|
||||
|
||||
sub _equation {
|
||||
return [0, 0] unless my $equation = shift;
|
||||
|
||||
# "even"
|
||||
return [2, 2] if $equation =~ /^\s*even\s*$/i;
|
||||
|
||||
# "odd"
|
||||
return [2, 1] if $equation =~ /^\s*odd\s*$/i;
|
||||
|
||||
# "4", "+4" or "-4"
|
||||
return [0, $1] if $equation =~ /^\s*((?:\+|-)?\d+)\s*$/;
|
||||
|
||||
# "n", "4n", "+4n", "-4n", "n+1", "4n-1", "+4n-1" (and other variations)
|
||||
return [0, 0] unless $equation =~ /^\s*((?:\+|-)?(?:\d+)?)?n\s*((?:\+|-)\s*\d+)?\s*$/i;
|
||||
return [$1 eq '-' ? -1 : !length $1 ? 1 : $1, join('', split(' ', $2 // 0))];
|
||||
}
|
||||
|
||||
sub _is_scoped {
|
||||
my $selector = shift;
|
||||
|
||||
for my $pc (grep { $_->[0] eq 'pc' } map { ref $_ ? @$_ : () } @$selector) {
|
||||
|
||||
# Selector with ":scope"
|
||||
return 1 if $pc->[1] eq 'scope';
|
||||
|
||||
# Argument of functional pseudo-class with ":scope"
|
||||
return 1 if ($pc->[1] eq 'has' || $pc->[1] eq 'is' || $pc->[1] eq 'not') && grep { _is_scoped($_) } @{$pc->[2]};
|
||||
}
|
||||
|
||||
return undef;
|
||||
}
|
||||
|
||||
sub _match {
|
||||
my ($group, $current, $tree, $scope) = @_;
|
||||
_combinator([reverse @$_], $current, $tree, $scope, 0) and return 1 for @$group;
|
||||
return undef;
|
||||
}
|
||||
|
||||
sub _name {qr/(?:^|:)\Q@{[_unescape(shift)]}\E$/}
|
||||
|
||||
sub _namespace {
|
||||
my ($ns, $current) = @_;
|
||||
|
||||
my $attr = $current->[1] =~ /^([^:]+):/ ? "xmlns:$1" : 'xmlns';
|
||||
while ($current) {
|
||||
last if $current->[0] eq 'root';
|
||||
return $current->[2]{$attr} eq $ns if exists $current->[2]{$attr};
|
||||
|
||||
$current = $current->[3];
|
||||
}
|
||||
|
||||
# Failing to match yields true if searching for no namespace, false otherwise
|
||||
return !length $ns;
|
||||
}
|
||||
|
||||
sub _pc {
|
||||
my ($class, $args, $current, $tree, $scope) = @_;
|
||||
|
||||
# ":scope" (root can only be a :scope)
|
||||
return $current eq $scope if $class eq 'scope';
|
||||
return undef if $current->[0] eq 'root';
|
||||
|
||||
# ":checked"
|
||||
return exists $current->[2]{checked} || exists $current->[2]{selected} if $class eq 'checked';
|
||||
|
||||
# ":not"
|
||||
return !_match($args, $current, $current, $scope) if $class eq 'not';
|
||||
|
||||
# ":is"
|
||||
return !!_match($args, $current, $current, $scope) if $class eq 'is';
|
||||
|
||||
# ":has"
|
||||
return !!_select(1, $current, $args) if $class eq 'has';
|
||||
|
||||
# ":empty"
|
||||
return !grep { !_empty($_) } @$current[4 .. $#$current] if $class eq 'empty';
|
||||
|
||||
# ":root"
|
||||
return $current->[3] && $current->[3][0] eq 'root' if $class eq 'root';
|
||||
|
||||
# ":any-link", ":link" and ":visited"
|
||||
if ($class eq 'any-link' || $class eq 'link' || $class eq 'visited') {
|
||||
return undef unless $current->[0] eq 'tag' && exists $current->[2]{href};
|
||||
return !!grep { $current->[1] eq $_ } qw(a area link);
|
||||
}
|
||||
|
||||
# ":only-child" or ":only-of-type"
|
||||
if ($class eq 'only-child' || $class eq 'only-of-type') {
|
||||
my $type = $class eq 'only-of-type' ? $current->[1] : undef;
|
||||
$_ ne $current and return undef for @{_siblings($current, $type)};
|
||||
return 1;
|
||||
}
|
||||
|
||||
# ":nth-child", ":nth-last-child", ":nth-of-type" or ":nth-last-of-type"
|
||||
if (ref $args) {
|
||||
my $type = $class eq 'nth-of-type' || $class eq 'nth-last-of-type' ? $current->[1] : undef;
|
||||
my @siblings = @{_siblings($current, $type)};
|
||||
@siblings = reverse @siblings if $class eq 'nth-last-child' || $class eq 'nth-last-of-type';
|
||||
|
||||
for my $i (0 .. $#siblings) {
|
||||
next if (my $result = $args->[0] * $i + $args->[1]) < 1;
|
||||
return undef unless my $sibling = $siblings[$result - 1];
|
||||
return 1 if $sibling eq $current;
|
||||
}
|
||||
}
|
||||
|
||||
# Everything else
|
||||
return undef;
|
||||
}
|
||||
|
||||
sub _root {
|
||||
my $tree = shift;
|
||||
$tree = $tree->[3] while $tree->[0] ne 'root';
|
||||
return $tree;
|
||||
}
|
||||
|
||||
sub _select {
|
||||
my ($one, $scope, $group) = @_;
|
||||
|
||||
# Scoped selectors require the whole tree to be searched
|
||||
my $tree = $scope;
|
||||
($group, $tree) = (_absolutize($group), _root($scope)) if grep { _is_scoped($_) } @$group;
|
||||
|
||||
my @results;
|
||||
my @queue = @$tree[($tree->[0] eq 'root' ? 1 : 4) .. $#$tree];
|
||||
while (my $current = shift @queue) {
|
||||
next unless $current->[0] eq 'tag';
|
||||
|
||||
unshift @queue, @$current[4 .. $#$current];
|
||||
next unless _match($group, $current, $tree, $scope);
|
||||
$one ? return $current : push @results, $current;
|
||||
}
|
||||
|
||||
return $one ? undef : \@results;
|
||||
}
|
||||
|
||||
sub _selector {
|
||||
my ($selector, $current, $tree, $scope) = @_;
|
||||
|
||||
# The root might be the scope
|
||||
my $is_tag = $current->[0] eq 'tag';
|
||||
for my $s (@$selector) {
|
||||
my $type = $s->[0];
|
||||
|
||||
# Tag
|
||||
if ($is_tag && $type eq 'tag') {
|
||||
return undef if defined $s->[1] && $current->[1] !~ $s->[1];
|
||||
return undef if defined $s->[2] && !_namespace($s->[2], $current);
|
||||
}
|
||||
|
||||
# Attribute
|
||||
elsif ($is_tag && $type eq 'attr') { return undef unless _attr(@$s[1, 2], $current) }
|
||||
|
||||
# Pseudo-class
|
||||
elsif ($type eq 'pc') { return undef unless _pc(@$s[1, 2], $current, $tree, $scope) }
|
||||
|
||||
# No match
|
||||
else { return undef }
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub _sibling {
|
||||
my ($selectors, $current, $tree, $scope, $immediate, $pos) = @_;
|
||||
|
||||
my $found;
|
||||
for my $sibling (@{_siblings($current)}) {
|
||||
return $found if $sibling eq $current;
|
||||
|
||||
# "+" (immediately preceding sibling)
|
||||
if ($immediate) { $found = _combinator($selectors, $sibling, $tree, $scope, $pos) }
|
||||
|
||||
# "~" (preceding sibling)
|
||||
else { return 1 if _combinator($selectors, $sibling, $tree, $scope, $pos) }
|
||||
}
|
||||
|
||||
return undef;
|
||||
}
|
||||
|
||||
sub _siblings {
|
||||
my ($current, $type) = @_;
|
||||
|
||||
my $parent = $current->[3];
|
||||
my @siblings = grep { $_->[0] eq 'tag' } @$parent[($parent->[0] eq 'root' ? 1 : 4) .. $#$parent];
|
||||
@siblings = grep { $type eq $_->[1] } @siblings if defined $type;
|
||||
|
||||
return \@siblings;
|
||||
}
|
||||
|
||||
sub _unescape {
|
||||
return undef unless defined(my $value = shift);
|
||||
|
||||
# Remove escaped newlines
|
||||
$value =~ s/\\\n//g;
|
||||
|
||||
# Unescape Unicode characters
|
||||
$value =~ s/\\([0-9a-fA-F]{1,6})\s?/pack 'U', hex $1/ge;
|
||||
|
||||
# Remove backslash
|
||||
$value =~ s/\\//g;
|
||||
|
||||
return $value;
|
||||
}
|
||||
|
||||
sub _value {
|
||||
my ($op, $value, $insensitive) = @_;
|
||||
return undef unless defined $value;
|
||||
$value = ($insensitive ? '(?i)' : '') . quotemeta _unescape($value);
|
||||
|
||||
# "~=" (word)
|
||||
return qr/(?:^|\s+)$value(?:\s+|$)/ if $op eq '~';
|
||||
|
||||
# "|=" (hyphen-separated)
|
||||
return qr/^$value(?:-|$)/ if $op eq '|';
|
||||
|
||||
# "*=" (contains)
|
||||
return qr/$value/ if $op eq '*';
|
||||
|
||||
# "^=" (begins with)
|
||||
return qr/^$value/ if $op eq '^';
|
||||
|
||||
# "$=" (ends with)
|
||||
return qr/$value$/ if $op eq '$';
|
||||
|
||||
# Everything else
|
||||
return qr/^$value$/;
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
=encoding utf8
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Mojo::DOM::CSS - CSS selector engine
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use Mojo::DOM::CSS;
|
||||
|
||||
# Select elements from DOM tree
|
||||
my $css = Mojo::DOM::CSS->new(tree => $tree);
|
||||
my $elements = $css->select('h1, h2, h3');
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
L<Mojo::DOM::CSS> is the CSS selector engine used by L<Mojo::DOM>, based on the L<HTML Living
|
||||
Standard|https://html.spec.whatwg.org> and L<Selectors Level 3|https://www.w3.org/TR/css3-selectors/>.
|
||||
|
||||
=head1 SELECTORS
|
||||
|
||||
All CSS selectors that make sense for a standalone parser are supported.
|
||||
|
||||
=head2 *
|
||||
|
||||
Any element.
|
||||
|
||||
my $all = $css->select('*');
|
||||
|
||||
=head2 E
|
||||
|
||||
An element of type C<E>.
|
||||
|
||||
my $title = $css->select('title');
|
||||
|
||||
=head2 E[foo]
|
||||
|
||||
An C<E> element with a C<foo> attribute.
|
||||
|
||||
my $links = $css->select('a[href]');
|
||||
|
||||
=head2 E[foo="bar"]
|
||||
|
||||
An C<E> element whose C<foo> attribute value is exactly equal to C<bar>.
|
||||
|
||||
my $case_sensitive = $css->select('input[type="hidden"]');
|
||||
my $case_sensitive = $css->select('input[type=hidden]');
|
||||
|
||||
=head2 E[foo="bar" i]
|
||||
|
||||
An C<E> element whose C<foo> attribute value is exactly equal to any (ASCII-range) case-permutation of C<bar>. Note
|
||||
that this selector is B<EXPERIMENTAL> and might change without warning!
|
||||
|
||||
my $case_insensitive = $css->select('input[type="hidden" i]');
|
||||
my $case_insensitive = $css->select('input[type=hidden i]');
|
||||
my $case_insensitive = $css->select('input[class~="foo" i]');
|
||||
|
||||
This selector is part of L<Selectors Level 4|https://dev.w3.org/csswg/selectors-4>, which is still a work in progress.
|
||||
|
||||
=head2 E[foo="bar" s]
|
||||
|
||||
An C<E> element whose C<foo> attribute value is exactly and case-sensitively equal to C<bar>. Note that this selector
|
||||
is B<EXPERIMENTAL> and might change without warning!
|
||||
|
||||
my $case_sensitive = $css->select('input[type="hidden" s]');
|
||||
|
||||
This selector is part of L<Selectors Level 4|https://dev.w3.org/csswg/selectors-4>, which is still a work in progress.
|
||||
|
||||
=head2 E[foo~="bar"]
|
||||
|
||||
An C<E> element whose C<foo> attribute value is a list of whitespace-separated values, one of which is exactly equal to
|
||||
C<bar>.
|
||||
|
||||
my $foo = $css->select('input[class~="foo"]');
|
||||
my $foo = $css->select('input[class~=foo]');
|
||||
|
||||
=head2 E[foo^="bar"]
|
||||
|
||||
An C<E> element whose C<foo> attribute value begins exactly with the string C<bar>.
|
||||
|
||||
my $begins_with = $css->select('input[name^="f"]');
|
||||
my $begins_with = $css->select('input[name^=f]');
|
||||
|
||||
=head2 E[foo$="bar"]
|
||||
|
||||
An C<E> element whose C<foo> attribute value ends exactly with the string C<bar>.
|
||||
|
||||
my $ends_with = $css->select('input[name$="o"]');
|
||||
my $ends_with = $css->select('input[name$=o]');
|
||||
|
||||
=head2 E[foo*="bar"]
|
||||
|
||||
An C<E> element whose C<foo> attribute value contains the substring C<bar>.
|
||||
|
||||
my $contains = $css->select('input[name*="fo"]');
|
||||
my $contains = $css->select('input[name*=fo]');
|
||||
|
||||
=head2 E[foo|="en"]
|
||||
|
||||
An C<E> element whose C<foo> attribute has a hyphen-separated list of values beginning (from the left) with C<en>.
|
||||
|
||||
my $english = $css->select('link[hreflang|=en]');
|
||||
|
||||
=head2 E:root
|
||||
|
||||
An C<E> element, root of the document.
|
||||
|
||||
my $root = $css->select(':root');
|
||||
|
||||
=head2 E:nth-child(n)
|
||||
|
||||
An C<E> element, the C<n-th> child of its parent.
|
||||
|
||||
my $third = $css->select('div:nth-child(3)');
|
||||
my $odd = $css->select('div:nth-child(odd)');
|
||||
my $even = $css->select('div:nth-child(even)');
|
||||
my $top3 = $css->select('div:nth-child(-n+3)');
|
||||
|
||||
=head2 E:nth-last-child(n)
|
||||
|
||||
An C<E> element, the C<n-th> child of its parent, counting from the last one.
|
||||
|
||||
my $third = $css->select('div:nth-last-child(3)');
|
||||
my $odd = $css->select('div:nth-last-child(odd)');
|
||||
my $even = $css->select('div:nth-last-child(even)');
|
||||
my $bottom3 = $css->select('div:nth-last-child(-n+3)');
|
||||
|
||||
=head2 E:nth-of-type(n)
|
||||
|
||||
An C<E> element, the C<n-th> sibling of its type.
|
||||
|
||||
my $third = $css->select('div:nth-of-type(3)');
|
||||
my $odd = $css->select('div:nth-of-type(odd)');
|
||||
my $even = $css->select('div:nth-of-type(even)');
|
||||
my $top3 = $css->select('div:nth-of-type(-n+3)');
|
||||
|
||||
=head2 E:nth-last-of-type(n)
|
||||
|
||||
An C<E> element, the C<n-th> sibling of its type, counting from the last one.
|
||||
|
||||
my $third = $css->select('div:nth-last-of-type(3)');
|
||||
my $odd = $css->select('div:nth-last-of-type(odd)');
|
||||
my $even = $css->select('div:nth-last-of-type(even)');
|
||||
my $bottom3 = $css->select('div:nth-last-of-type(-n+3)');
|
||||
|
||||
=head2 E:first-child
|
||||
|
||||
An C<E> element, first child of its parent.
|
||||
|
||||
my $first = $css->select('div p:first-child');
|
||||
|
||||
=head2 E:last-child
|
||||
|
||||
An C<E> element, last child of its parent.
|
||||
|
||||
my $last = $css->select('div p:last-child');
|
||||
|
||||
=head2 E:first-of-type
|
||||
|
||||
An C<E> element, first sibling of its type.
|
||||
|
||||
my $first = $css->select('div p:first-of-type');
|
||||
|
||||
=head2 E:last-of-type
|
||||
|
||||
An C<E> element, last sibling of its type.
|
||||
|
||||
my $last = $css->select('div p:last-of-type');
|
||||
|
||||
=head2 E:only-child
|
||||
|
||||
An C<E> element, only child of its parent.
|
||||
|
||||
my $lonely = $css->select('div p:only-child');
|
||||
|
||||
=head2 E:only-of-type
|
||||
|
||||
An C<E> element, only sibling of its type.
|
||||
|
||||
my $lonely = $css->select('div p:only-of-type');
|
||||
|
||||
=head2 E:empty
|
||||
|
||||
An C<E> element that has no children (including text nodes).
|
||||
|
||||
my $empty = $css->select(':empty');
|
||||
|
||||
=head2 E:any-link
|
||||
|
||||
Alias for L</"E:link">. Note that this selector is B<EXPERIMENTAL> and might change without warning! This selector is
|
||||
part of L<Selectors Level 4|https://dev.w3.org/csswg/selectors-4>, which is still a work in progress.
|
||||
|
||||
=head2 E:link
|
||||
|
||||
An C<E> element being the source anchor of a hyperlink of which the target is not yet visited (C<:link>) or already
|
||||
visited (C<:visited>). Note that L<Mojo::DOM::CSS> is not stateful, therefore C<:any-link>, C<:link> and C<:visited>
|
||||
yield exactly the same results.
|
||||
|
||||
my $links = $css->select(':any-link');
|
||||
my $links = $css->select(':link');
|
||||
my $links = $css->select(':visited');
|
||||
|
||||
=head2 E:visited
|
||||
|
||||
Alias for L</"E:link">.
|
||||
|
||||
=head2 E:scope
|
||||
|
||||
An C<E> element being a designated reference element. Note that this selector is B<EXPERIMENTAL> and might change
|
||||
without warning!
|
||||
|
||||
my $scoped = $css->select('a:not(:scope > a)');
|
||||
my $scoped = $css->select('div :scope p');
|
||||
my $scoped = $css->select('~ p');
|
||||
|
||||
This selector is part of L<Selectors Level 4|https://dev.w3.org/csswg/selectors-4>, which is still a work in progress.
|
||||
|
||||
=head2 E:checked
|
||||
|
||||
A user interface element C<E> which is checked (for instance a radio-button or checkbox).
|
||||
|
||||
my $input = $css->select(':checked');
|
||||
|
||||
=head2 E.warning
|
||||
|
||||
An C<E> element whose class is "warning".
|
||||
|
||||
my $warning = $css->select('div.warning');
|
||||
|
||||
=head2 E#myid
|
||||
|
||||
An C<E> element with C<ID> equal to "myid".
|
||||
|
||||
my $foo = $css->select('div#foo');
|
||||
|
||||
=head2 E:not(s1, s2)
|
||||
|
||||
An C<E> element that does not match either compound selector C<s1> or compound selector C<s2>. Note that support for
|
||||
compound selectors is B<EXPERIMENTAL> and might change without warning!
|
||||
|
||||
my $others = $css->select('div p:not(:first-child, :last-child)');
|
||||
|
||||
Support for compound selectors was added as part of L<Selectors Level 4|https://dev.w3.org/csswg/selectors-4>, which is
|
||||
still a work in progress.
|
||||
|
||||
=head2 E:is(s1, s2)
|
||||
|
||||
An C<E> element that matches compound selector C<s1> and/or compound selector C<s2>. Note that this selector is
|
||||
B<EXPERIMENTAL> and might change without warning!
|
||||
|
||||
my $headers = $css->select(':is(section, article, aside, nav) h1');
|
||||
|
||||
This selector is part of L<Selectors Level 4|https://dev.w3.org/csswg/selectors-4>, which is still a work in progress.
|
||||
|
||||
=head2 E:has(rs1, rs2)
|
||||
|
||||
An C<E> element, if either of the relative selectors C<rs1> or C<rs2>, when evaluated with C<E> as the :scope elements,
|
||||
match an element. Note that this selector is B<EXPERIMENTAL> and might change without warning!
|
||||
|
||||
my $link = $css->select('a:has(> img)');
|
||||
|
||||
This selector is part of L<Selectors Level 4|https://dev.w3.org/csswg/selectors-4>, which is still a work in progress.
|
||||
Also be aware that this feature is currently marked C<at-risk>, so there is a high chance that it will get removed
|
||||
completely.
|
||||
|
||||
=head2 A|E
|
||||
|
||||
An C<E> element that belongs to the namespace alias C<A> from L<CSS Namespaces Module Level
|
||||
3|https://www.w3.org/TR/css-namespaces-3/>. Key/value pairs passed to selector methods are used to declare namespace
|
||||
aliases.
|
||||
|
||||
my $elem = $css->select('lq|elem', lq => 'http://example.com/q-markup');
|
||||
|
||||
Using an empty alias searches for an element that belongs to no namespace.
|
||||
|
||||
my $div = $c->select('|div');
|
||||
|
||||
=head2 E F
|
||||
|
||||
An C<F> element descendant of an C<E> element.
|
||||
|
||||
my $headlines = $css->select('div h1');
|
||||
|
||||
=head2 E E<gt> F
|
||||
|
||||
An C<F> element child of an C<E> element.
|
||||
|
||||
my $headlines = $css->select('html > body > div > h1');
|
||||
|
||||
=head2 E + F
|
||||
|
||||
An C<F> element immediately preceded by an C<E> element.
|
||||
|
||||
my $second = $css->select('h1 + h2');
|
||||
|
||||
=head2 E ~ F
|
||||
|
||||
An C<F> element preceded by an C<E> element.
|
||||
|
||||
my $second = $css->select('h1 ~ h2');
|
||||
|
||||
=head2 E, F, G
|
||||
|
||||
Elements of type C<E>, C<F> and C<G>.
|
||||
|
||||
my $headlines = $css->select('h1, h2, h3');
|
||||
|
||||
=head2 E[foo=bar][bar=baz]
|
||||
|
||||
An C<E> element whose attributes match all following attribute selectors.
|
||||
|
||||
my $links = $css->select('a[foo^=b][foo$=ar]');
|
||||
|
||||
=head1 ATTRIBUTES
|
||||
|
||||
L<Mojo::DOM::CSS> implements the following attributes.
|
||||
|
||||
=head2 tree
|
||||
|
||||
my $tree = $css->tree;
|
||||
$css = $css->tree(['root']);
|
||||
|
||||
Document Object Model. Note that this structure should only be used very carefully since it is very dynamic.
|
||||
|
||||
=head1 METHODS
|
||||
|
||||
L<Mojo::DOM::CSS> inherits all methods from L<Mojo::Base> and implements the following new ones.
|
||||
|
||||
=head2 matches
|
||||
|
||||
my $bool = $css->matches('head > title');
|
||||
my $bool = $css->matches('svg|line', svg => 'http://www.w3.org/2000/svg');
|
||||
|
||||
Check if first node in L</"tree"> matches the CSS selector. Trailing key/value pairs can be used to declare xml
|
||||
namespace aliases.
|
||||
|
||||
=head2 select
|
||||
|
||||
my $results = $css->select('head > title');
|
||||
my $results = $css->select('svg|line', svg => 'http://www.w3.org/2000/svg');
|
||||
|
||||
Run CSS selector against L</"tree">. Trailing key/value pairs can be used to declare xml namespace aliases.
|
||||
|
||||
=head2 select_one
|
||||
|
||||
my $result = $css->select_one('head > title');
|
||||
my $result =
|
||||
$css->select_one('svg|line', svg => 'http://www.w3.org/2000/svg');
|
||||
|
||||
Run CSS selector against L</"tree"> and stop as soon as the first node matched. Trailing key/value pairs can be used to
|
||||
declare xml namespace aliases.
|
||||
|
||||
=head1 DEBUGGING
|
||||
|
||||
You can set the C<MOJO_DOM_CSS_DEBUG> environment variable to get some advanced diagnostics information printed to
|
||||
C<STDERR>.
|
||||
|
||||
MOJO_DOM_CSS_DEBUG=1
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<Mojolicious>, L<Mojolicious::Guides>, L<https://mojolicious.org>.
|
||||
|
||||
=cut
|
||||
370
database/perl/vendor/lib/Mojo/DOM/HTML.pm
vendored
Normal file
370
database/perl/vendor/lib/Mojo/DOM/HTML.pm
vendored
Normal file
@@ -0,0 +1,370 @@
|
||||
package Mojo::DOM::HTML;
|
||||
use Mojo::Base -base;
|
||||
|
||||
use Exporter qw(import);
|
||||
use Mojo::Util qw(html_attr_unescape html_unescape xml_escape);
|
||||
use Scalar::Util qw(weaken);
|
||||
|
||||
our @EXPORT_OK = ('tag_to_html');
|
||||
|
||||
has tree => sub { ['root'] };
|
||||
has 'xml';
|
||||
|
||||
my $ATTR_RE = qr/
|
||||
([^<>=\s\/]+|\/) # Key
|
||||
(?:
|
||||
\s*=\s*
|
||||
(?s:(["'])(.*?)\g{-2}|([^>\s]*)) # Value
|
||||
)?
|
||||
\s*
|
||||
/x;
|
||||
my $TOKEN_RE = qr/
|
||||
([^<]+)? # Text
|
||||
(?:
|
||||
<(?:
|
||||
!(?:
|
||||
DOCTYPE(
|
||||
\s+\w+ # Doctype
|
||||
(?:(?:\s+\w+)?(?:\s+(?:"[^"]*"|'[^']*'))+)? # External ID
|
||||
(?:\s+\[.+?\])? # Int Subset
|
||||
\s*)
|
||||
|
|
||||
--(.*?)--\s* # Comment
|
||||
|
|
||||
\[CDATA\[(.*?)\]\] # CDATA
|
||||
)
|
||||
|
|
||||
\?(.*?)\? # Processing Instruction
|
||||
|
|
||||
\s*([^<>\s]+\s*(?:(?:$ATTR_RE){0,32766})*+) # Tag
|
||||
)>
|
||||
|
|
||||
(<) # Runaway "<"
|
||||
)??
|
||||
/xis;
|
||||
|
||||
# HTML elements that only contain raw text
|
||||
my %RAW = map { $_ => 1 } qw(script style);
|
||||
|
||||
# HTML elements that only contain raw text and entities
|
||||
my %RCDATA = map { $_ => 1 } qw(title textarea);
|
||||
|
||||
# HTML elements with optional end tags
|
||||
my %END = (body => 'head', optgroup => 'optgroup', option => 'option');
|
||||
|
||||
# HTML elements that break paragraphs
|
||||
map { $END{$_} = 'p' } (
|
||||
qw(address article aside blockquote details dialog div dl fieldset figcaption figure footer form h1 h2 h3 h4 h5 h6),
|
||||
qw(header hgroup hr main menu nav ol p pre section table ul)
|
||||
);
|
||||
|
||||
# HTML table elements with optional end tags
|
||||
my %TABLE = map { $_ => 1 } qw(colgroup tbody td tfoot th thead tr);
|
||||
|
||||
# HTML elements with optional end tags and scoping rules
|
||||
my %CLOSE = (li => [{li => 1}, {ul => 1, ol => 1}], tr => [{tr => 1}, {table => 1}]);
|
||||
$CLOSE{$_} = [\%TABLE, {table => 1}] for qw(colgroup tbody tfoot thead);
|
||||
$CLOSE{$_} = [{dd => 1, dt => 1}, {dl => 1}] for qw(dd dt);
|
||||
$CLOSE{$_} = [{rp => 1, rt => 1}, {ruby => 1}] for qw(rp rt);
|
||||
$CLOSE{$_} = [{th => 1, td => 1}, {table => 1}] for qw(td th);
|
||||
|
||||
# HTML parent elements that signal no more content when closed, but that are also phrasing content
|
||||
my %NO_MORE_CONTENT = (ruby => [qw(rt rp)], select => [qw(option optgroup)]);
|
||||
|
||||
# HTML elements without end tags
|
||||
my %EMPTY = map { $_ => 1 } qw(area base br col embed hr img input keygen link menuitem meta param source track wbr);
|
||||
|
||||
# HTML elements categorized as phrasing content (and obsolete inline elements)
|
||||
my @PHRASING = (
|
||||
qw(a abbr area audio b bdi bdo br button canvas cite code data datalist del dfn em embed i iframe img input ins kbd),
|
||||
qw(keygen label link map mark math meta meter noscript object output picture progress q ruby s samp script select),
|
||||
qw(slot small span strong sub sup svg template textarea time u var video wbr)
|
||||
);
|
||||
my @OBSOLETE = qw(acronym applet basefont big font strike tt);
|
||||
my %PHRASING = map { $_ => 1 } @OBSOLETE, @PHRASING;
|
||||
|
||||
# HTML elements that don't get their self-closing flag acknowledged
|
||||
my %BLOCK = map { $_ => 1 } (
|
||||
qw(a address applet article aside b big blockquote body button caption center code col colgroup dd details dialog),
|
||||
qw(dir div dl dt em fieldset figcaption figure font footer form frameset h1 h2 h3 h4 h5 h6 head header hgroup html),
|
||||
qw(i iframe li listing main marquee menu nav nobr noembed noframes noscript object ol optgroup option p plaintext),
|
||||
qw(pre rp rt s script section select small strike strong style summary table tbody td template textarea tfoot th),
|
||||
qw(thead title tr tt u ul xmp)
|
||||
);
|
||||
|
||||
sub parse {
|
||||
my ($self, $html) = (shift, "$_[0]");
|
||||
|
||||
my $xml = $self->xml;
|
||||
my $current = my $tree = ['root'];
|
||||
while ($html =~ /\G$TOKEN_RE/gcso) {
|
||||
my ($text, $doctype, $comment, $cdata, $pi, $tag, $runaway) = ($1, $2, $3, $4, $5, $6, $11);
|
||||
|
||||
# Text (and runaway "<")
|
||||
$text .= '<' if defined $runaway;
|
||||
_node($current, 'text', html_unescape $text) if defined $text;
|
||||
|
||||
# Tag
|
||||
if (defined $tag) {
|
||||
|
||||
# End
|
||||
if ($tag =~ /^\/\s*(\S+)/) {
|
||||
my $end = $xml ? $1 : lc $1;
|
||||
|
||||
# No more content
|
||||
if (!$xml && (my $tags = $NO_MORE_CONTENT{$end})) { _end($_, $xml, \$current) for @$tags }
|
||||
|
||||
_end($xml ? $1 : lc $1, $xml, \$current);
|
||||
}
|
||||
|
||||
# Start
|
||||
elsif ($tag =~ m!^([^\s/]+)([\s\S]*)!) {
|
||||
my ($start, $attr) = ($xml ? $1 : lc $1, $2);
|
||||
|
||||
# Attributes
|
||||
my (%attrs, $closing);
|
||||
while ($attr =~ /$ATTR_RE/go) {
|
||||
my ($key, $value) = ($xml ? $1 : lc $1, $3 // $4);
|
||||
|
||||
# Empty tag
|
||||
++$closing and next if $key eq '/';
|
||||
|
||||
$attrs{$key} = defined $value ? html_attr_unescape $value : $value;
|
||||
}
|
||||
|
||||
# "image" is an alias for "img"
|
||||
$start = 'img' if !$xml && $start eq 'image';
|
||||
_start($start, \%attrs, $xml, \$current);
|
||||
|
||||
# Element without end tag (self-closing)
|
||||
_end($start, $xml, \$current) if !$xml && $EMPTY{$start} || ($xml || !$BLOCK{$start}) && $closing;
|
||||
|
||||
# Raw text elements
|
||||
next if $xml || !$RAW{$start} && !$RCDATA{$start};
|
||||
next unless $html =~ m!\G(.*?)<\s*/\s*\Q$start\E\s*>!gcsi;
|
||||
_node($current, 'raw', $RCDATA{$start} ? html_unescape $1 : $1);
|
||||
_end($start, 0, \$current);
|
||||
}
|
||||
}
|
||||
|
||||
# DOCTYPE
|
||||
elsif (defined $doctype) { _node($current, 'doctype', $doctype) }
|
||||
|
||||
# Comment
|
||||
elsif (defined $comment) { _node($current, 'comment', $comment) }
|
||||
|
||||
# CDATA
|
||||
elsif (defined $cdata) { _node($current, 'cdata', $cdata) }
|
||||
|
||||
# Processing instruction (try to detect XML)
|
||||
elsif (defined $pi) {
|
||||
$self->xml($xml = 1) if !exists $self->{xml} && $pi =~ /xml/i;
|
||||
_node($current, 'pi', $pi);
|
||||
}
|
||||
}
|
||||
|
||||
return $self->tree($tree);
|
||||
}
|
||||
|
||||
sub render { _render($_[0]->tree, $_[0]->xml) }
|
||||
|
||||
sub tag { shift->tree(['root', _tag(@_)]) }
|
||||
|
||||
sub tag_to_html { _render(_tag(@_), undef) }
|
||||
|
||||
sub _end {
|
||||
my ($end, $xml, $current) = @_;
|
||||
|
||||
# Search stack for start tag
|
||||
my $next = $$current;
|
||||
do {
|
||||
|
||||
# Ignore useless end tag
|
||||
return if $next->[0] eq 'root';
|
||||
|
||||
# Right tag
|
||||
return $$current = $next->[3] if $next->[1] eq $end;
|
||||
|
||||
# Phrasing content can only cross phrasing content
|
||||
return if !$xml && $PHRASING{$end} && !$PHRASING{$next->[1]};
|
||||
|
||||
} while $next = $next->[3];
|
||||
}
|
||||
|
||||
sub _node {
|
||||
my ($current, $type, $content) = @_;
|
||||
push @$current, my $new = [$type, $content, $current];
|
||||
weaken $new->[2];
|
||||
}
|
||||
|
||||
sub _render {
|
||||
my ($tree, $xml) = @_;
|
||||
|
||||
# Tag
|
||||
my $type = $tree->[0];
|
||||
if ($type eq 'tag') {
|
||||
|
||||
# Start tag
|
||||
my $tag = $tree->[1];
|
||||
my $result = "<$tag";
|
||||
|
||||
# Attributes
|
||||
for my $key (sort keys %{$tree->[2]}) {
|
||||
my $value = $tree->[2]{$key};
|
||||
$result .= $xml ? qq{ $key="$key"} : " $key" and next unless defined $value;
|
||||
$result .= qq{ $key="} . xml_escape($value) . '"';
|
||||
}
|
||||
|
||||
# No children
|
||||
return $xml ? "$result />" : $EMPTY{$tag} ? "$result>" : "$result></$tag>" unless $tree->[4];
|
||||
|
||||
# Children
|
||||
no warnings 'recursion';
|
||||
$result .= '>' . join '', map { _render($_, $xml) } @$tree[4 .. $#$tree];
|
||||
|
||||
# End tag
|
||||
return "$result</$tag>";
|
||||
}
|
||||
|
||||
# Text (escaped)
|
||||
return xml_escape $tree->[1] if $type eq 'text';
|
||||
|
||||
# Raw text
|
||||
return $tree->[1] if $type eq 'raw';
|
||||
|
||||
# Root
|
||||
return join '', map { _render($_, $xml) } @$tree[1 .. $#$tree] if $type eq 'root';
|
||||
|
||||
# DOCTYPE
|
||||
return '<!DOCTYPE' . $tree->[1] . '>' if $type eq 'doctype';
|
||||
|
||||
# Comment
|
||||
return '<!--' . $tree->[1] . '-->' if $type eq 'comment';
|
||||
|
||||
# CDATA
|
||||
return '<![CDATA[' . $tree->[1] . ']]>' if $type eq 'cdata';
|
||||
|
||||
# Processing instruction
|
||||
return '<?' . $tree->[1] . '?>' if $type eq 'pi';
|
||||
|
||||
# Everything else
|
||||
return '';
|
||||
}
|
||||
|
||||
sub _start {
|
||||
my ($start, $attrs, $xml, $current) = @_;
|
||||
|
||||
# Autoclose optional HTML elements
|
||||
if (!$xml && $$current->[0] ne 'root') {
|
||||
if (my $end = $END{$start}) { _end($end, 0, $current) }
|
||||
|
||||
elsif (my $close = $CLOSE{$start}) {
|
||||
my ($allowed, $scope) = @$close;
|
||||
|
||||
# Close allowed parent elements in scope
|
||||
my $parent = $$current;
|
||||
while ($parent->[0] ne 'root' && !$scope->{$parent->[1]}) {
|
||||
_end($parent->[1], 0, $current) if $allowed->{$parent->[1]};
|
||||
$parent = $parent->[3];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# New tag
|
||||
push @$$current, my $new = ['tag', $start, $attrs, $$current];
|
||||
weaken $new->[3];
|
||||
$$current = $new;
|
||||
}
|
||||
|
||||
sub _tag {
|
||||
my $tree = ['tag', shift, undef, undef];
|
||||
|
||||
# Content
|
||||
push @$tree, ref $_[-1] eq 'CODE' ? ['raw', pop->()] : ['text', pop] if @_ % 2;
|
||||
|
||||
# Attributes
|
||||
my $attrs = $tree->[2] = {@_};
|
||||
return $tree unless exists $attrs->{data} && ref $attrs->{data} eq 'HASH';
|
||||
my $data = delete $attrs->{data};
|
||||
@$attrs{map { y/_/-/; lc "data-$_" } keys %$data} = values %$data;
|
||||
return $tree;
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
=encoding utf8
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Mojo::DOM::HTML - HTML/XML engine
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use Mojo::DOM::HTML;
|
||||
|
||||
# Turn HTML into DOM tree
|
||||
my $html = Mojo::DOM::HTML->new;
|
||||
$html->parse('<div><p id="a">Test</p><p id="b">123</p></div>');
|
||||
my $tree = $html->tree;
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
L<Mojo::DOM::HTML> is the HTML/XML engine used by L<Mojo::DOM>, based on the L<HTML Living
|
||||
Standard|https://html.spec.whatwg.org> and the L<Extensible Markup Language (XML) 1.0|https://www.w3.org/TR/xml/>.
|
||||
|
||||
=head1 FUNCTIONS
|
||||
|
||||
L<Mojo::DOM::HTML> implements the following functions, which can be imported individually.
|
||||
|
||||
=head2 tag_to_html
|
||||
|
||||
my $str = tag_to_html 'div', id => 'foo', 'safe content';
|
||||
|
||||
Generate HTML/XML tag and render it right away. This is a significantly faster alternative to L</"tag"> for template
|
||||
systems that have to generate a lot of tags.
|
||||
|
||||
=head1 ATTRIBUTES
|
||||
|
||||
L<Mojo::DOM::HTML> implements the following attributes.
|
||||
|
||||
=head2 tree
|
||||
|
||||
my $tree = $html->tree;
|
||||
$html = $html->tree(['root']);
|
||||
|
||||
Document Object Model. Note that this structure should only be used very carefully since it is very dynamic.
|
||||
|
||||
=head2 xml
|
||||
|
||||
my $bool = $html->xml;
|
||||
$html = $html->xml($bool);
|
||||
|
||||
Disable HTML semantics in parser and activate case-sensitivity, defaults to auto-detection based on XML declarations.
|
||||
|
||||
=head1 METHODS
|
||||
|
||||
L<Mojo::DOM::HTML> inherits all methods from L<Mojo::Base> and implements the following new ones.
|
||||
|
||||
=head2 parse
|
||||
|
||||
$html = $html->parse('<foo bar="baz">I ♥ Mojolicious!</foo>');
|
||||
|
||||
Parse HTML/XML fragment.
|
||||
|
||||
=head2 render
|
||||
|
||||
my $str = $html->render;
|
||||
|
||||
Render DOM to HTML/XML.
|
||||
|
||||
=head2 tag
|
||||
|
||||
$html = $html->tag('div', id => 'foo', 'safe content');
|
||||
|
||||
Generate HTML/XML tag.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<Mojolicious>, L<Mojolicious::Guides>, L<https://mojolicious.org>.
|
||||
|
||||
=cut
|
||||
Reference in New Issue
Block a user