Initial Commit
This commit is contained in:
108
database/perl/lib/Regexp/Trie.pm
Normal file
108
database/perl/lib/Regexp/Trie.pm
Normal file
@@ -0,0 +1,108 @@
|
||||
#
|
||||
# $Id: Trie.pm,v 0.2 2006/04/27 05:24:40 dankogai Exp dankogai $
|
||||
#
|
||||
|
||||
package Regexp::Trie;
|
||||
use 5.008001;
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
our $VERSION = sprintf "%d.%02d", q$Revision: 0.2 $ =~ /(\d+)/g;
|
||||
|
||||
# use overload q("") => sub { shift->regexp };
|
||||
|
||||
sub new{ bless {} => shift }
|
||||
sub add{
|
||||
my $self = shift;
|
||||
my $str = shift;
|
||||
my $ref = $self;
|
||||
for my $char (split //, $str){
|
||||
$ref->{$char} ||= {};
|
||||
$ref = $ref->{$char};
|
||||
}
|
||||
$ref->{''} = 1; # { '' => 1 } as terminator
|
||||
$self;
|
||||
}
|
||||
sub _regexp{
|
||||
my $self = shift;
|
||||
return if $self->{''} and scalar keys %$self == 1; # terminator
|
||||
my (@alt, @cc);
|
||||
my $q = 0;
|
||||
for my $char (sort keys %$self){
|
||||
my $qchar = quotemeta $char;
|
||||
if (ref $self->{$char}){
|
||||
if (defined (my $recurse = _regexp($self->{$char}))){
|
||||
push @alt, $qchar . $recurse;
|
||||
}else{
|
||||
push @cc, $qchar;
|
||||
}
|
||||
}else{
|
||||
$q = 1;
|
||||
}
|
||||
}
|
||||
my $cconly = !@alt;
|
||||
@cc and push @alt, @cc == 1 ? $cc[0] : '['. join('', @cc). ']';
|
||||
my $result = @alt == 1 ? $alt[0] : '(?:' . join('|', @alt) . ')';
|
||||
$q and $result = $cconly ? "$result?" : "(?:$result)?";
|
||||
return $result;
|
||||
}
|
||||
sub regexp{ my $str = shift->_regexp; qr/$str/ }
|
||||
|
||||
1;
|
||||
__END__
|
||||
# Below is stub documentation for your module. You'd better edit it!
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Regexp::Trie - builds trie-ized regexp
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use Regexp::Trie;
|
||||
my $rt = Regexp::Trie->new;
|
||||
for (qw/foobar fooxar foozap fooza/){
|
||||
$rt->add($_);
|
||||
}
|
||||
print $rt->regexp, "\n" # (?-xism:foo(?:bar|xar|zap?))
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module is a faster but simpler version of L<Regexp::Assemble> or
|
||||
L<Regexp::Optimizer>. It builds a trie-ized regexp as above.
|
||||
|
||||
This module is faster than L<Regexp::Assemble> but you can only add
|
||||
literals. C<a+b> is treated as C<a\+b>, not "more than one a's
|
||||
followed by b".
|
||||
|
||||
I wrote this module because I needed something faster than
|
||||
L<Regexp::Assemble> and L<Regexp::Optimizer>. If you need more minute
|
||||
control, use those instead.
|
||||
|
||||
=head1 TIPS
|
||||
|
||||
See t/dict2rx.pl to find how to convert a big dictionary into a single
|
||||
regexp that can be later loaded as:
|
||||
|
||||
my $rx = do 'dict.rx';
|
||||
|
||||
=head2 EXPORT
|
||||
|
||||
None.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<Regexp::Optimizer>, L<Regexp::Assemble>, L<Regex::PreSuf>
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Dan Kogai, E<lt>dankogai@dan.co.jpE<gt>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (C) 2006 by Dan Kogai
|
||||
|
||||
This library is free software; you can redistribute it and/or modify
|
||||
it under the same terms as Perl itself, either Perl version 5.8.8 or,
|
||||
at your option, any later version of Perl 5 you may have available.
|
||||
|
||||
=cut
|
||||
Reference in New Issue
Block a user