Initial Commit

2025-12-03 16:38:10 +01:00
parent c5e26bf594
commit b732d8d4b5
17680 changed files with 5977495 additions and 2 deletions
--- a/database/perl/vendor/lib/WWW/Mechanize.pm
+++ b/database/perl/vendor/lib/WWW/Mechanize.pm
--- a/database/perl/vendor/lib/WWW/Mechanize/Cookbook.pod
+++ b/database/perl/vendor/lib/WWW/Mechanize/Cookbook.pod
@@ -0,0 +1,103 @@
+# PODNAME: WWW::Mechanize::Cookbook
+# ABSTRACT: Recipes for using WWW::Mechanize
+
+__END__
+
+=pod
+
+=encoding UTF-8
+
+=head1 NAME
+
+WWW::Mechanize::Cookbook - Recipes for using WWW::Mechanize
+
+=head1 VERSION
+
+version 2.03
+
+=head1 INTRODUCTION
+
+First, please note that many of these are possible just using
+L<LWP::UserAgent>.  Since C<WWW::Mechanize> is a subclass of
+L<LWP::UserAgent>, whatever works on C<LWP::UserAgent> should work
+on C<WWW::Mechanize>.  See the L<lwpcook> man page included with
+the L<libwww-perl> distribution.
+
+=head1 BASICS
+
+=head2 Launch the WWW::Mechanize browser
+
+    use WWW::Mechanize;
+
+    my $mech = WWW::Mechanize->new( autocheck => 1 );
+
+The C<< autocheck => 1 >> tells Mechanize to die if any IO fails,
+so you don't have to manually check.  It's easier that way.  If you
+want to do your own error checking, leave it out.
+
+=head2 Fetch a page
+
+    $mech->get( "http://search.cpan.org" );
+    print $mech->content;
+
+C<< $mech->content >> contains the raw HTML from the web page.  It
+is not parsed or handled in any way, at least through the C<content>
+method.
+
+=head2 Fetch a page into a file
+
+Sometimes you want to dump your results directly into a file.  For
+example, there's no reason to read a JPEG into memory if you're
+only going to write it out immediately.  This can also help with
+memory issues on large files.
+
+    $mech->get( "http://www.cpan.org/src/stable.tar.gz",
+                ":content_file" => "stable.tar.gz" );
+
+=head2 Fetch a password-protected page
+
+Generally, just call C<credentials> before fetching the page.
+
+    $mech->credentials( 'admin' => 'password' );
+    $mech->get( 'http://10.11.12.13/password.html' );
+    print $mech->content();
+
+=head1 LINKS
+
+=head2 Find all image links
+
+Find all links that point to a JPEG, GIF or PNG.
+
+    my @links = $mech->find_all_links(
+        tag => "a", url_regex => qr/\.(jpe?g|gif|png)$/i );
+
+=head2 Find all download links
+
+Find all links that have the word "download" in them.
+
+    my @links = $mech->find_all_links(
+        tag => "a", text_regex => qr/\bdownload\b/i );
+
+=head1 ADVANCED
+
+=head2 See what will be sent without actually sending anything
+
+    $mech->add_handler("request_send", sub { shift->dump; exit; });
+    $mech->get("http://www.example.com");
+
+=head1 SEE ALSO
+
+L<WWW::Mechanize>
+
+=head1 AUTHOR
+
+Andy Lester <andy at petdance.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2004 by Andy Lester.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=cut
--- a/database/perl/vendor/lib/WWW/Mechanize/Examples.pod
+++ b/database/perl/vendor/lib/WWW/Mechanize/Examples.pod
@@ -0,0 +1,583 @@
+# PODNAME: WWW::Mechanize::Examples
+# ABSTRACT: Sample programs that use WWW::Mechanize
+
+__END__
+
+=pod
+
+=encoding UTF-8
+
+=head1 NAME
+
+WWW::Mechanize::Examples - Sample programs that use WWW::Mechanize
+
+=head1 VERSION
+
+version 2.03
+
+=head1 SYNOPSIS
+
+Plenty of people have learned WWW::Mechanize, and now, you can too!
+
+Following are user-supplied samples of WWW::Mechanize in action.
+If you have samples you'd like to contribute, please send 'em to
+C<< <andy@petdance.com> >>.
+
+You can also look at the F<t/*.t> files in the distribution.
+
+Please note that these examples are not intended to do any specific task.
+For all I know, they're no longer functional because the sites they
+hit have changed.  They're here to give examples of how people have
+used WWW::Mechanize.
+
+Note that the examples are in reverse order of my having received them,
+so the freshest examples are always at the top.
+
+=head2 Starbucks Density Calculator, by Nat Torkington
+
+Here's a pair of programs from Nat Torkington, editor for O'Reilly Media
+and co-author of the I<Perl Cookbook>.
+
+=over 4
+
+Rael [Dornfest] discovered that you can easily find out how many Starbucks
+there are in an area by searching for "Starbucks".  So I wrote a silly
+scraper for some old census data and came up with some Starbucks density
+figures.  There's no meaning to these numbers thanks to errors from using
+old census data coupled with false positives in Yahoo search (e.g.,
+"Dodie Starbuck-Your Style Desgn" in Portland OR).  But it was fun to
+waste a night on.
+
+Here are the top twenty cities in descending order of population,
+with the amount of territory each Starbucks has.  E.g., A New York NY
+Starbucks covers 1.7 square miles of ground.
+
+    New York, NY        1.7
+    Los Angeles, CA     1.2
+    Chicago, IL         1.0
+    Houston, TX         4.6
+    Philadelphia, PA    6.8
+    San Diego, CA       2.7
+    Detroit, MI        19.9
+    Dallas, TX          2.7
+    Phoenix, AZ         4.1
+    San Antonio, TX    12.3
+    San Jose, CA        1.1
+    Baltimore, MD       3.9
+    Indianapolis, IN   12.1
+    San Francisco, CA   0.5
+    Jacksonville, FL   39.9
+    Columbus, OH        7.3
+    Milwaukee, WI       5.1
+    Memphis, TN        15.1
+    Washington, DC      1.4
+    Boston, MA          0.5
+
+=back
+
+C<get_pop_data>
+
+    #!/usr/bin/perl -w
+
+    use WWW::Mechanize;
+    use Storable;
+
+    $url = 'http://www.census.gov/population/www/documentation/twps0027.html';
+    $m = WWW::Mechanize->new();
+    $m->get($url);
+
+    $c = $m->content;
+
+    $c =~ m{<A NAME=.tabA.>(.*?)</TABLE>}s
+      or die "Can't find the population table\n";
+    $t = $1;
+    @outer = $t =~ m{<TR.*?>(.*?)</TR>}gs;
+    shift @outer;
+    foreach $r (@outer) {
+      @bits = $r =~ m{<TD.*?>(.*?)</TD>}gs;
+      for ($x = 0; $x < @bits; $x++) {
+        $b = $bits[$x];
+        @v = split /\s*<BR>\s*/, $b;
+        foreach (@v) { s/^\s+//; s/\s+$// }
+        push @{$data[$x]}, @v;
+      }
+    }
+
+    for ($y = 0; $y < @{$data[0]}; $y++) {
+        $data{$data[1][$y]} = {
+            NAME => $data[1][$y],
+            RANK => $data[0][$y],
+            POP  => comma_free($data[2][$y]),
+            AREA => comma_free($data[3][$y]),
+            DENS => comma_free($data[4][$y]),
+        };
+    }
+
+    store(\%data, "cities.dat");
+
+    sub comma_free {
+      my $n = shift;
+      $n =~ s/,//;
+      return $n;
+    }
+
+C<plague_of_coffee>
+
+    #!/usr/bin/perl -w
+
+    use WWW::Mechanize;
+    use strict;
+    use Storable;
+
+    $SIG{__WARN__} = sub {} ;  # ssssssh
+
+    my $Cities = retrieve("cities.dat");
+
+    my $m = WWW::Mechanize->new();
+    $m->get("http://local.yahoo.com/");
+
+    my @cities = sort { $Cities->{$a}{RANK} <=> $Cities->{$b}{RANK} } keys %$Cities;
+    foreach my $c ( @cities ) {
+      my $fields = {
+        'stx' => "starbucks",
+        'csz' => $c,
+      };
+
+      my $r = $m->submit_form(form_number => 2,
+                              fields => $fields);
+      die "Couldn't submit form" unless $r->is_success;
+
+      my $hits = number_of_hits($r);
+      #  my $ppl  = sprintf("%d", 1000 * $Cities->{$c}{POP} / $hits);
+      #  print "$c has $hits Starbucks.  That's one for every $ppl people.\n";
+      my $density = sprintf("%.1f", $Cities->{$c}{AREA} / $hits);
+      print "$c : $density\n";
+    }
+
+    sub number_of_hits {
+      my $r = shift;
+      my $c = $r->content;
+      if ($c =~ m{\d+ out of <b>(\d+)</b> total results for}) {
+        return $1;
+      }
+      if ($c =~ m{Sorry, no .*? found in or near}) {
+        return 0;
+      }
+      if ($c =~ m{Your search matched multiple cities}) {
+        warn "Your search matched multiple cities\n";
+        return 0;
+      }
+      if ($c =~ m{Sorry we couldn.t find that location}) {
+        warn "No cities\n";
+        return 0;
+      }
+      if ($c =~ m{Could not find.*?, showing results for}) {
+        warn "No matches\n";
+        return 0;
+      }
+      die "Unknown response\n$c\n";
+    }
+
+=head2 pb-upload, by John Beppu
+
+This program takes filenames of images from the command line and
+uploads them to a www.photobucket.com folder.  John Beppu, the author, says:
+
+=over 4
+
+I had 92 pictures I wanted to upload, and doing it through a browser
+would've been torture.  But thanks to mech, all I had to do was
+`./pb.upload *.jpg` and watch it do its thing.  It felt good.
+If I had more time, I'd implement WWW::Photobucket on top of
+WWW::Mechanize.
+
+=back
+
+    #!/usr/bin/perl -w -T
+
+    use strict;
+    use WWW::Mechanize;
+
+    my $login    = "login_name";
+    my $password = "password";
+    my $folder   = "folder";
+
+    my $url = "http://img78.photobucket.com/albums/v281/$login/$folder/";
+
+    # login to your photobucket.com account
+    my $mech = WWW::Mechanize->new();
+    $mech->get($url);
+    $mech->submit_form(
+        form_number => 1,
+        fields      => { password => $password },
+    );
+    die unless ($mech->success);
+
+    # upload image files specified on command line
+    foreach (@ARGV) {
+        print "$_\n";
+        $mech->form_number(2);
+        $mech->field('the_file[]' => $_);
+        $mech->submit();
+    }
+
+=head2 listmod, by Ian Langworth
+
+Ian Langworth contributes this little gem that will bring joy to
+beleaguered mailing list admins.  It discards spam messages through
+mailman's web interface.
+
+    #!/arch/unix/bin/perl
+    use strict;
+    use warnings;
+    #
+    # listmod - fast alternative to mailman list interface
+    #
+    # usage: listmod crew XXXXXXXX
+    #
+
+    die "usage: $0 <listname> <password>\n" unless @ARGV == 2;
+    my ($listname, $password) = @ARGV;
+
+    use CGI qw(unescape);
+
+    use WWW::Mechanize;
+    my $m = WWW::Mechanize->new( autocheck => 1 );
+
+    use Term::ReadLine;
+    my $term = Term::ReadLine->new($0);
+
+    # submit the form, get the cookie, go to the list admin page
+    $m->get("https://lists.ccs.neu.edu/bin/admindb/$listname");
+    $m->set_visible( $password );
+    $m->click;
+
+    # exit if nothing to do
+    print "There are no pending requests.\n" and exit
+        if $m->content =~ /There are no pending requests/;
+
+    # select the first form and examine its contents
+    $m->form_number(1);
+    my $f = $m->current_form or die "Couldn't get first form!\n";
+
+    # get me the base form element for each email item
+    my @items = map {m/^.+?-(.+)/} grep {m/senderbanp/} $f->param
+        or die "Couldn't get items in first form!\n";
+
+    # iterate through items, prompt user, commit actions
+    foreach my $item (@items) {
+
+        # show item info
+        my $sender = unescape($item);
+        my ($subject) = [$f->find_input("senderbanp-$item")->value_names]->[1]
+            =~ /Subject:\s+(.+?)\s+Size:/g;
+
+        # prompt user
+        my $choice = '';
+        while ( $choice !~ /^[DAX]$/ ) {
+            print "$sender\: '$subject'\n";
+            $choice = uc $term->readline("Action: defer/accept/discard [dax]: ");
+            print "\n\n";
+        }
+
+        # set button
+        $m->field("senderaction-$item" => {D=>0,A=>1,X=>3}->{$choice});
+    }
+
+    # submit actions
+    $m->click;
+
+=head2 ccdl, by Andy Lester
+
+Steve McConnell, author of the landmark I<Code Complete> has put
+up the chapters for the 2nd edition in PDF format on his website.
+I needed to download them to take to Kinko's to have printed.  This
+little program did it for me.
+
+    #!/usr/bin/perl -w
+
+    use strict;
+    use WWW::Mechanize;
+
+    my $start = "http://www.stevemcconnell.com/cc2/cc.htm";
+
+    my $mech = WWW::Mechanize->new( autocheck => 1 );
+    $mech->get( $start );
+
+    my @links = $mech->find_all_links( url_regex => qr/\d+.+\.pdf$/ );
+
+    for my $link ( @links ) {
+        my $url = $link->url_abs;
+        my $filename = $url;
+        $filename =~ s[^.+/][];
+
+        print "Fetching $url";
+        $mech->get( $url, ':content_file' => $filename );
+
+        print "   ", -s $filename, " bytes\n";
+    }
+
+=head2 quotes.pl, by Andy Lester
+
+This was a program that was going to get a hack in I<Spidering Hacks>,
+but got cut at the last minute, probably because it's against IMDB's TOS
+to scrape from it.  I present it here as an example, not a suggestion
+that you break their TOS.
+
+Last I checked, it didn't work because their HTML didn't match, but it's
+still good as sample code.
+
+    #!/usr/bin/perl -w
+
+    use strict;
+
+    use WWW::Mechanize;
+    use Getopt::Long;
+    use Text::Wrap;
+
+    my $match = undef;
+    my $random = undef;
+    GetOptions(
+        "match=s" => \$match,
+        "random" => \$random,
+    ) or exit 1;
+
+    my $movie = shift @ARGV or die "Must specify a movie\n";
+
+    my $quotes_page = get_quotes_page( $movie );
+    my @quotes = extract_quotes( $quotes_page );
+
+    if ( $match ) {
+        $match = quotemeta($match);
+        @quotes = grep /$match/i, @quotes;
+    }
+
+    if ( $random ) {
+        print $quotes[rand @quotes];
+    }
+    else {
+        print join( "\n", @quotes );
+    }
+
+
+    sub get_quotes_page {
+        my $movie = shift;
+
+        my $mech = WWW::Mechanize->new;
+        $mech->get( "http://www.imdb.com/search" );
+        $mech->success or die "Can't get the search page";
+
+        $mech->submit_form(
+            form_number => 2,
+            fields => {
+                title	=> $movie,
+                restrict    => "Movies only",
+            },
+        );
+
+        my @links = $mech->find_all_links( url_regex => qr[^/Title] )
+            or die "No matches for \"$movie\" were found.\n";
+
+        # Use the first link
+        my ( $url, $title ) = @{$links[0]};
+
+        warn "Checking $title...\n";
+
+        $mech->get( $url );
+        my $link = $mech->find_link( text_regex => qr/Memorable Quotes/i )
+            or die qq{"$title" has no quotes in IMDB!\n};
+
+        warn "Fetching quotes...\n\n";
+        $mech->get( $link->[0] );
+
+        return $mech->content;
+    }
+
+
+    sub extract_quotes {
+        my $page = shift;
+
+        # Nibble away at the unwanted HTML at the beginnning...
+        $page =~ s/.+Memorable Quotes//si;
+        $page =~ s/.+?(<a name)/$1/si;
+
+        # ... and the end of the page
+        $page =~ s/Browse titles in the movie quotes.+$//si;
+        $page =~ s/<p.+$//g;
+
+        # Quotes separated by an <HR> tag
+        my @quotes = split( /<hr.+?>/, $page );
+
+        for my $quote ( @quotes ) {
+            my @lines = split( /<br>/, $quote );
+            for ( @lines ) {
+                s/<[^>]+>//g;   # Strip HTML tags
+                s/\s+/ /g;	    # Squash whitespace
+                s/^ //;	    # Strip leading space
+                s/ $//;	    # Strip trailing space
+                s/&#34;/"/g;    # Replace HTML entity quotes
+
+                # Word-wrap to fit in 72 columns
+                $Text::Wrap::columns = 72;
+                $_ = wrap( '', '    ', $_ );
+            }
+            $quote = join( "\n", @lines );
+        }
+
+        return @quotes;
+    }
+
+=head2 cpansearch.pl, by Ed Silva
+
+A quick little utility to search the CPAN and fire up a browser
+with a results page.
+
+    #!/usr/bin/perl
+
+    # turn on perl's safety features
+    use strict;
+    use warnings;
+
+    # work out the name of the module we're looking for
+    my $module_name = $ARGV[0]
+      or die "Must specify module name on command line";
+
+    # create a new browser
+    use WWW::Mechanize;
+    my $browser = WWW::Mechanize->new();
+
+    # tell it to get the main page
+    $browser->get("http://search.cpan.org/");
+
+    # okay, fill in the box with the name of the
+    # module we want to look up
+    $browser->form_number(1);
+    $browser->field("query", $module_name);
+    $browser->click();
+
+    # click on the link that matches the module name
+    $browser->follow_link( text_regex => $module_name );
+
+    my $url = $browser->uri;
+
+    # launch a browser...
+    system('galeon', $url);
+
+    exit(0);
+
+=head2 lj_friends.cgi, by Matt Cashner
+
+    #!/usr/bin/perl
+
+    # Provides an rss feed of a paid user's LiveJournal friends list
+    # Full entries, protected entries, etc.
+    # Add to your favorite rss reader as
+    # http://your.site.com/cgi-bin/lj_friends.cgi?user=USER&password=PASSWORD
+
+    use warnings;
+    use strict;
+
+    use WWW::Mechanize;
+    use CGI;
+
+    my $cgi = CGI->new();
+    my $form = $cgi->Vars;
+
+    my $agent = WWW::Mechanize->new();
+
+    $agent->get('http://www.livejournal.com/login.bml');
+    $agent->form_number('3');
+    $agent->field('user',$form->{user});
+    $agent->field('password',$form->{password});
+    $agent->submit();
+    $agent->get('http://www.livejournal.com/customview.cgi?user='.$form->{user}.'&styleid=225596&checkcookies=1');
+    print "Content-type: text/plain\n\n";
+    print $agent->content();
+
+=head2 Hacking Movable Type, by Dan Rinzel
+
+    use strict;
+    use WWW::Mechanize;
+
+    # a tool to automatically post entries to a moveable type weblog, and set arbitrary creation dates
+
+    my $mech = WWW::Mechanize->new();
+    my $entry;
+    $entry->{title} = "Test AutoEntry Title";
+    $entry->{btext} = "Test AutoEntry Body";
+    $entry->{date} = '2002-04-15 14:18:00';
+    my $start = qq|http://my.blog.site/mt.cgi|;
+
+    $mech->get($start);
+    $mech->field('username','und3f1n3d');
+    $mech->field('password','obscur3d');
+    $mech->submit(); # to get login cookie
+    $mech->get(qq|$start?__mode=view&_type=entry&blog_id=1|);
+    $mech->form_name('entry_form');
+    $mech->field('title',$entry->{title});
+    $mech->field('category_id',1); # adjust as needed
+    $mech->field('text',$entry->{btext});
+    $mech->field('status',2); # publish, or 1 = draft
+    $results = $mech->submit();
+
+    # if we're ok with this entry being datestamped "NOW" (no {date} in %entry)
+    # we're done. Otherwise, time to be tricksy
+    # MT returns a 302 redirect from this form. the redirect itself contains a <body onload=""> handler
+    # which takes the user to an editable version of the form where the create date can be edited
+    # MT date format of YYYY-MM-DD HH:MI:SS is the only one that won't error out
+
+    if ($entry->{date} && $entry->{date} =~ /^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}/) {
+        # travel the redirect
+        $results = $mech->get($results->{_headers}->{location});
+        $results->{_content} =~ /<body onLoad="([^\"]+)"/is;
+        my $js = $1;
+        $js =~ /\'([^']+)\'/;
+        $results = $mech->get($start.$1);
+        $mech->form_name('entry_form');
+        $mech->field('created_on_manual',$entry->{date});
+        $mech->submit();
+    }
+
+=head2 get-despair, by Randal Schwartz
+
+Randal submitted this bot that walks the despair.com site sucking down
+all the pictures.
+
+    use strict;
+    $|++;
+
+    use WWW::Mechanize;
+    use File::Basename;
+
+    my $m = WWW::Mechanize->new;
+
+    $m->get("http://www.despair.com/indem.html");
+
+    my @top_links = @{$m->links};
+
+    for my $top_link_num (0..$#top_links) {
+        next unless $top_links[$top_link_num][0] =~ /^http:/;
+
+        $m->follow_link( n=>$top_link_num ) or die "can't follow $top_link_num";
+
+        print $m->uri, "\n";
+        for my $image (grep m{^http://store4}, map $_->[0], @{$m->links}) {
+            my $local = basename $image;
+            print " $image...", $m->mirror($image, $local)->message, "\n"
+        }
+
+        $m->back or die "can't go back";
+    }
+
+=head1 AUTHOR
+
+Andy Lester <andy at petdance.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2004 by Andy Lester.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=cut
--- a/database/perl/vendor/lib/WWW/Mechanize/FAQ.pod
+++ b/database/perl/vendor/lib/WWW/Mechanize/FAQ.pod
@@ -0,0 +1,464 @@
+# PODNAME: WWW::Mechanize::FAQ
+# ABSTRACT: Frequently Asked Questions about WWW::Mechanize
+
+__END__
+
+=pod
+
+=encoding UTF-8
+
+=head1 NAME
+
+WWW::Mechanize::FAQ - Frequently Asked Questions about WWW::Mechanize
+
+=head1 VERSION
+
+version 2.03
+
+=head1 How to get help with WWW::Mechanize
+
+If your question isn't answered here in the FAQ, please turn to the
+communities at:
+
+=over
+
+=item * StackOverflow L<https://stackoverflow.com/questions/tagged/www-mechanize>
+
+=item * #lwp on irc.perl.org
+
+=item * L<http://perlmonks.org>
+
+=item * The libwww-perl mailing list at L<http://lists.perl.org>
+
+=back
+
+=head1 JavaScript
+
+=head2 I have this web page that has JavaScript on it, and my Mech program doesn't work.
+
+That's because WWW::Mechanize doesn't operate on the JavaScript.  It only
+understands the HTML parts of the page.
+
+=head2 I thought Mech was supposed to work like a web browser.
+
+It does pretty much, but it doesn't support JavaScript.
+
+I added some basic attempts at picking up URLs in C<window.open()>
+calls and return them in C<< $mech->links >>.  They work sometimes.
+
+Since Javascript is completely visible to the client, it cannot be used
+to prevent a scraper from following links. But it can make life difficult. If
+you want to scrape specific pages, then a solution is always possible.
+
+One typical use of Javascript is to perform argument checking before
+posting to the server. The URL you want is probably just buried in the
+Javascript function. Do a regular expression match on
+C<< $mech->content() >>
+to find the link that you want and C<< $mech->get >> it directly (this
+assumes that you know what you are looking for in advance).
+
+In more difficult cases, the Javascript is used for URL mangling to
+satisfy the needs of some middleware. In this case you need to figure
+out what the Javascript is doing (why are these URLs always really
+long?). There is probably some function with one or more arguments which
+calculates the new URL. Step one: using your favorite browser, get the
+before and after URLs and save them to files. Edit each file, converting
+the argument separators ('?', '&' or ';') into newlines. Now it is
+easy to use diff or comm to find out what Javascript did to the URL.
+Step 2 - find the function call which created the URL - you will need
+to parse and interpret its argument list. The Javascript Debugger in the
+Firebug extension for Firefox helps with the analysis. At this point, it is
+fairly trivial to write your own function which emulates the Javascript
+for the pages you want to process.
+
+Here's another approach that answers the question, "It works in Firefox,
+but why not Mech?"  Everything the web server knows about the client is
+present in the HTTP request. If two requests are identical, the results
+should be identical. So the real question is "What is different between
+the mech request and the Firefox request?"
+
+The Firefox extension "Tamper Data" is an effective tool for examining
+the headers of the requests to the server. Compare that with what LWP
+is sending. Once the two are identical, the action of the server should
+be the same as well.
+
+I say "should", because this is an oversimplification - some values
+are naturally unique, e.g. a SessionID, but if a SessionID is present,
+that is probably sufficient, even though the value will be different
+between the LWP request and the Firefox request. The server could use
+the session to store information which is troublesome, but that's not
+the first place to look (and highly unlikely to be relevant when you
+are requesting the login page of your site).
+
+Generally the problem is to be found in missing or incorrect POSTDATA
+arguments, Cookies, User-Agents, Accepts, etc. If you are using mech,
+then redirects and cookies should not be a problem, but are listed here
+for completeness. If you are missing headers, C<< $mech->add_header >>
+can be used to add the headers that you need.
+
+=head2 Which modules work like Mechanize and have JavaScript support?
+
+In no particular order: L<Gtk2::WebKit::Mechanize>, L<Win32::IE::Mechanize>,
+L<WWW::Mechanize::Firefox>, L<WWW::Scripter>, L<WWW::Selenium>
+
+=head1 How do I do X?
+
+=head2 Can I do [such-and-such] with WWW::Mechanize?
+
+If it's possible with LWP::UserAgent, then yes.  WWW::Mechanize is
+a subclass of L<LWP::UserAgent>, so all the wondrous magic of that
+class is inherited.
+
+=head2 How do I use WWW::Mechanize through a proxy server?
+
+See the docs in L<LWP::UserAgent> on how to use the proxy.  Short version:
+
+    $mech->proxy(['http', 'ftp'], 'http://proxy.example.com:8000/');
+
+or get the specs from the environment:
+
+    $mech->env_proxy();
+
+    # Environment set like so:
+    gopher_proxy=http://proxy.my.place/
+    wais_proxy=http://proxy.my.place/
+    no_proxy="localhost,my.domain"
+    export gopher_proxy wais_proxy no_proxy
+
+=head2 How can I see what fields are on the forms?
+
+Use the mech-dump utility, optionally installed with Mechanize.
+
+    $ mech-dump --forms http://search.cpan.org
+    Dumping forms
+    GET http://search.cpan.org/search
+      query=
+      mode=all                        (option)  [*all|module|dist|author]
+      <NONAME>=CPAN Search            (submit)
+
+=head2 How do I get Mech to handle authentication?
+
+    use MIME::Base64;
+
+    my $agent = WWW::Mechanize->new();
+    my @args = (
+        Authorization => "Basic " .
+            MIME::Base64::encode( USER . ':' . PASS )
+    );
+
+    $agent->credentials( ADDRESS, REALM, USER, PASS );
+    $agent->get( URL, @args );
+
+If you want to use the credentials for all future requests, you can
+also use the L<LWP::UserAgent> C<default_header()> method instead
+of the extra arguments to C<get()>
+
+    $mech->default_header(
+        Authorization => 'Basic ' . encode_base64( USER . ':' . PASSWORD ) );
+
+=head2 How can I get WWW::Mechanize to execute this JavaScript?
+
+You can't.  JavaScript is entirely client-based, and WWW::Mechanize
+is a client that doesn't understand JavaScript.  See the top part
+of this FAQ.
+
+=head2 How do I check a checkbox that doesn't have a value defined?
+
+Set it to the value of "on".
+
+    $mech->field( my_checkbox => 'on' );
+
+=head2 How do I handle frames?
+
+You don't deal with them as frames, per se, but as links.  Extract
+them with
+
+    my @frame_links = $mech->find_link( tag => "frame" );
+
+=head2 How do I get a list of HTTP headers and their values?
+
+All L<HTTP::Headers> methods work on a L<HTTP::Response> object which is
+returned by the I<get()>, I<reload()>, I<response()/res()>, I<click()>,
+I<submit_form()>, and I<request()> methods.
+
+    my $mech = WWW::Mechanize->new( autocheck => 1 );
+    $mech->get( 'http://my.site.com' );
+    my $response = $mech->response();
+    for my $key ( $response->header_field_names() ) {
+        print $key, " : ", $response->header( $key ), "\n";
+    }
+
+=head2 How do I enable keep-alive?
+
+Since L<WWW::Mechanize> is a subclass of L<LWP::UserAgent>, you can
+use the same mechanism to enable keep-alive:
+
+    use LWP::ConnCache;
+    ...
+    $mech->conn_cache(LWP::ConnCache->new);
+
+=head2 How can I change/specify the action parameter of an HTML form?
+
+You can access the action of the form by utilizing the L<HTML::Form>
+object returned from one of the specifying form methods.
+
+Using C<< $mech->form_number($number) >>:
+
+    my $mech = WWW::mechanize->new;
+    $mech->get('http://someurlhere.com');
+    # Access the form using its Zero-Based Index by DOM order
+    $mech->form_number(0)->action('http://newAction'); #ABS URL
+
+Using C<< $mech->form_name($number) >>:
+
+    my $mech = WWW::mechanize->new;
+    $mech->get('http://someurlhere.com');
+    #Access the form using its Zero-Based Index by DOM order
+    $mech->form_name('trgForm')->action('http://newAction'); #ABS URL
+
+=head2 How do I save an image?  How do I save a large tarball?
+
+An image is just content.  You get the image and save it.
+
+    $mech->get( 'photo.jpg' );
+    $mech->save_content( '/path/to/my/directory/photo.jpg' );
+
+You can also save any content directly to disk using the C<:content_file>
+flag to C<get()>, which is part of L<LWP::UserAgent>.
+
+    $mech->get( 'http://www.cpan.org/src/stable.tar.gz',
+                ':content_file' => 'stable.tar.gz' );
+
+=head2 How do I pick a specific value from a C<< <select> >> list?
+
+Find the C<HTML::Form::ListInput> in the page.
+
+    my ($listbox) = $mech->find_all_inputs( name => 'listbox' );
+
+Then create a hash for the lookup:
+
+    my %name_lookup;
+    @name_lookup{ $listbox->value_names } = $listbox->possible_values;
+    my $value = $name_lookup{ 'Name I want' };
+
+If you have duplicate names, this method won't work, and you'll
+have to loop over C<< $listbox->value_names >> and
+C<< $listbox->possible_values >> in parallel until you find a
+matching name.
+
+=head2 How do I get Mech to not follow redirects?
+
+You use functionality in LWP::UserAgent, not Mech itself.
+
+    $mech->requests_redirectable( [] );
+
+Or you can set C<max_redirect>:
+
+    $mech->max_redirect( 0 );
+
+Both these options can also be set in the constructor.  Mech doesn't
+understand them, so will pass them through to the LWP::UserAgent
+constructor.
+
+=head1 Why doesn't this work: Debugging your Mechanize program
+
+=head2 My Mech program doesn't work, but it works in the browser.
+
+Mechanize acts like a browser, but apparently something you're doing
+is not matching the browser's behavior.  Maybe it's expecting a
+certain web client, or maybe you've not handling a field properly.
+For some reason, your Mech problem isn't doing exactly what the
+browser is doing, and when you find that, you'll have the answer.
+
+=head2 My Mech program gets these 500 errors.
+
+A 500 error from the web server says that the program on the server
+side died.  Probably the web server program was expecting certain
+inputs that you didn't supply, and instead of handling it nicely,
+the program died.
+
+Whatever the cause of the 500 error, if it works in the browser,
+but not in your Mech program, you're not acting like the browser.
+See the previous question.
+
+=head2 Why doesn't my program handle this form correctly?
+
+Run F<mech-dump> on your page and see what it says.
+
+F<mech-dump> is a marvelous diagnostic tool for figuring out what forms
+and fields are on the page.  Say you're scraping CNN.com, you'd get this:
+
+    $ mech-dump http://www.cnn.com/
+    GET http://search.cnn.com/cnn/search
+      source=cnn                     (hidden readonly)
+      invocationType=search/top      (hidden readonly)
+      sites=web                      (radio)    [*web/The Web ??|cnn/CNN.com ??]
+      query=                         (text)
+      <NONAME>=Search                (submit)
+
+    POST http://cgi.money.cnn.com/servlets/quote_redirect
+      query=                         (text)
+      <NONAME>=GET                   (submit)
+
+    POST http://polls.cnn.com/poll
+      poll_id=2112                   (hidden readonly)
+      question_1=<UNDEF>             (radio)    [1/Simplistic option|2/VIEW RESULTS]
+      <NONAME>=VOTE                  (submit)
+
+    GET http://search.cnn.com/cnn/search
+      source=cnn                     (hidden readonly)
+      invocationType=search/bottom   (hidden readonly)
+      sites=web                      (radio)    [*web/??CNN.com|cnn/??]
+      query=                         (text)
+      <NONAME>=Search                (submit)
+
+Four forms, including the first one duplicated at the end.  All the
+fields, all their defaults, lovingly generated by HTML::Form's C<dump>
+method.
+
+If you want to run F<mech-dump> on something that doesn't lend itself
+to a quick URL fetch, then use the C<save_content()> method to write
+the HTML to a file, and run F<mech-dump> on the file.
+
+=head2 Why don't https:// URLs work?
+
+You need either L<IO::Socket::SSL> or L<Crypt::SSLeay> installed.
+
+=head2 Why do I get "Input 'fieldname' is readonly"?
+
+You're trying to change the value of a hidden field and you have
+warnings on.
+
+First, make sure that you actually mean to change the field that you're
+changing, and that you don't have a typo.  Usually, hidden variables are
+set by the site you're working on for a reason.  If you change the value,
+you might be breaking some functionality by faking it out.
+
+If you really do want to change a hidden value, make the changes in a
+scope that has warnings turned off:
+
+    {
+    local $^W = 0;
+    $agent->field( name => $value );
+    }
+
+=head2 I tried to [such-and-such] and I got this weird error.
+
+Are you checking your errors?
+
+Are you sure?
+
+Are you checking that your action succeeded after every action?
+
+Are you sure?
+
+For example, if you try this:
+
+    $mech->get( "http://my.site.com" );
+    $mech->follow_link( "foo" );
+
+and the C<get> call fails for some reason, then the Mech internals
+will be unusable for the C<follow_link> and you'll get a weird
+error.  You B<must>, after every action that GETs or POSTs a page,
+check that Mech succeeded, or all bets are off.
+
+    $mech->get( "http://my.site.com" );
+    die "Can't even get the home page: ", $mech->response->status_line
+        unless $mech->success;
+
+    $mech->follow_link( "foo" );
+    die "Foo link failed: ", $mech->response->status_line
+        unless $mech->success;
+
+=head2 How do I figure out why C<< $mech->get($url) >> doesn't work?
+
+There are many reasons why a C<< get() >> can fail. The server can take
+you to someplace you didn't expect. It can generate redirects which are
+not properly handled. You can get time-outs. Servers are down more often
+than you think! etc, etc, etc. A couple of places to start:
+
+=over 4
+
+=item 1 Check C<< $mech->status() >> after each call
+
+=item 2 Check the URL with C<< $mech->uri() >> to see where you ended up
+
+=item 3 Try debugging with C<< LWP::ConsoleLogger >>.
+
+=back
+
+If things are really strange, turn on debugging with
+C<< use LWP::ConsoleLogger::Everywhere; >>
+Just put this in the main program. This causes LWP to print out a trace
+of the HTTP traffic between client and server and can be used to figure
+out what is happening at the protocol level.
+
+It is also useful to set many traps to verify that processing is
+proceeding as expected. A Mech program should always have an "I didn't
+expect to get here" or "I don't recognize the page that I am processing"
+case and bail out.
+
+Since errors can be transient, by the time you notice that the error
+has occurred, it might not be possible to reproduce it manually. So
+for automated processing it is useful to email yourself the following
+information:
+
+=over 4
+
+=item * where processing is taking place
+
+=item * An Error Message
+
+=item * $mech->uri
+
+=item * $mech->content
+
+=back
+
+You can also save the content of the page with C<< $mech->save_content( 'filename.html' ); >>
+
+=head2 I submitted a form, but the server ignored everything!  I got an empty form back!
+
+The post is handled by application software. It is common for PHP
+programmers to use the same file both to display a form and to process
+the arguments returned. So the first task of the application programmer
+is to decide whether there are arguments to processes. The program can
+check whether a particular parameter has been set, whether a hidden
+parameter has been set, or whether the submit button has been clicked.
+(There are probably other ways that I haven't thought of).
+
+In any case, if your form is not setting the parameter (e.g. the submit
+button) which the web application is keying on (and as an outsider there
+is no way to know what it is keying on), it will not notice that the form
+has been submitted. Try using C<< $mech->click() >> instead of
+C<< $mech->submit() >> or vice-versa.
+
+=head2 I've logged in to the server, but I get 500 errors when I try to get to protected content.
+
+Some web sites use distributed databases for their processing. It
+can take a few seconds for the login/session information to percolate
+through to all the servers. For human users with their slow reaction
+times, this is not a problem, but a Perl script can outrun the server.
+So try adding a C<sleep(5)> between logging in and actually doing anything
+(the optimal delay must be determined experimentally).
+
+=head2 Mech is a big memory pig!  I'm running out of RAM!
+
+Mech keeps a history of every page, and the state it was in.  It actually
+keeps a clone of the full Mech object at every step along the way.
+
+You can limit this stack size with the C<stack_depth> param in the C<new()>
+constructor.  If you set stack_size to 0, Mech will not keep any history.
+
+=head1 AUTHOR
+
+Andy Lester <andy at petdance.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2004 by Andy Lester.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=cut
--- a/database/perl/vendor/lib/WWW/Mechanize/Image.pm
+++ b/database/perl/vendor/lib/WWW/Mechanize/Image.pm
@@ -0,0 +1,154 @@
+package WWW::Mechanize::Image;
+
+use strict;
+use warnings;
+
+our $VERSION = '2.03';
+
+#ABSTRACT: Image object for WWW::Mechanize
+
+
+sub new {
+    my $class = shift;
+    my $params = shift || {};
+
+    my $self = bless {}, $class;
+
+    for my $param ( qw( url base tag height width alt name attrs ) ) {
+        # Check for what we passed in, not whether it's defined
+        $self->{$param} = $params->{$param} if exists $params->{$param};
+    }
+
+    # url and tag are always required
+    for ( qw( url tag ) ) {
+        exists $self->{$_} or die "WWW::Mechanize::Image->new must have a $_ argument";
+    }
+
+    return $self;
+}
+
+
+sub url     { return ($_[0])->{url}; }
+sub base    { return ($_[0])->{base}; }
+sub name    { return ($_[0])->{name}; }
+sub tag     { return ($_[0])->{tag}; }
+sub height  { return ($_[0])->{height}; }
+sub width   { return ($_[0])->{width}; }
+sub alt     { return ($_[0])->{alt}; }
+sub attrs   { return ($_[0])->{attrs}; }
+
+
+sub URI {
+    my $self = shift;
+
+    require URI::URL;
+    my $URI = URI::URL->new( $self->url, $self->base );
+
+    return $URI;
+}
+
+
+sub url_abs {
+    my $self = shift;
+
+    return $self->URI->abs;
+}
+
+
+1;
+
+__END__
+
+=pod
+
+=encoding UTF-8
+
+=head1 NAME
+
+WWW::Mechanize::Image - Image object for WWW::Mechanize
+
+=head1 VERSION
+
+version 2.03
+
+=head1 SYNOPSIS
+
+Image object to encapsulate all the stuff that Mech needs
+
+=head1 Constructor
+
+=head2 new()
+
+Creates and returns a new C<WWW::Mechanize::Image> object.
+
+    my $image = WWW::Mechanize::Image->new( {
+        url    => $url,
+        base   => $base,
+        tag    => $tag,
+        name   => $name,     # From the INPUT tag
+        height => $height,   # optional
+        width  => $width,    # optional
+        alt    => $alt,      # optional
+        attrs  => $attr_ref, # optional
+    } );
+
+=head1 Accessors
+
+=head2 $image->url()
+
+Image URL from the C<src> attribute of the source tag.
+
+May be C<undef> if source tag has no C<src> attribute.
+
+=head2 $image->base()
+
+Base URL to which the links are relative.
+
+=head2 $image->name()
+
+Name for the field from the NAME attribute, if any.
+
+=head2 $image->tag()
+
+Tag name (either "image" or "input")
+
+=head2 $image->height()
+
+Image height
+
+=head2 $image->width()
+
+Image width
+
+=head2 $image->alt()
+
+ALT attribute from the source tag, if any.
+
+=head2 $image->attrs()
+
+Hash ref of all the attributes and attribute values in the tag.
+
+=head2 $image->URI()
+
+Returns the URL as a L<URI::URL> object.
+
+=head2 $image->url_abs()
+
+Returns the URL as an absolute URL string.
+
+=head1 SEE ALSO
+
+L<WWW::Mechanize> and L<WWW::Mechanize::Link>
+
+=head1 AUTHOR
+
+Andy Lester <andy at petdance.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2004 by Andy Lester.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=cut
--- a/database/perl/vendor/lib/WWW/Mechanize/Link.pm
+++ b/database/perl/vendor/lib/WWW/Mechanize/Link.pm
@@ -0,0 +1,143 @@
+package WWW::Mechanize::Link;
+
+use strict;
+use warnings;
+
+our $VERSION = '2.03';
+
+#ABSTRACT: Link object for WWW::Mechanize
+
+
+sub new {
+    my $class = shift;
+
+    my $self;
+
+    # The order of the first four must stay as they are for
+    # compatibility with older code.
+    if ( ref $_[0] eq 'HASH' ) {
+        $self = [ @{$_[0]}{ qw( url text name tag base attrs ) } ];
+    }
+    else {
+        $self = [ @_ ];
+    }
+
+    return bless $self, $class;
+}
+
+
+sub url   { return ($_[0])->[0]; }
+sub text  { return ($_[0])->[1]; }
+sub name  { return ($_[0])->[2]; }
+sub tag   { return ($_[0])->[3]; }
+sub base  { return ($_[0])->[4]; }
+sub attrs { return ($_[0])->[5]; }
+
+
+sub URI {
+    my $self = shift;
+
+    require URI::URL;
+    my $URI = URI::URL->new( $self->url, $self->base );
+
+    return $URI;
+}
+
+
+sub url_abs {
+    my $self = shift;
+
+    return $self->URI->abs;
+}
+
+
+1;
+
+__END__
+
+=pod
+
+=encoding UTF-8
+
+=head1 NAME
+
+WWW::Mechanize::Link - Link object for WWW::Mechanize
+
+=head1 VERSION
+
+version 2.03
+
+=head1 SYNOPSIS
+
+Link object to encapsulate all the stuff that Mech needs but nobody
+wants to deal with as an array.
+
+=head1 Constructor
+
+=head2 new()
+
+    my $link = WWW::Mechanize::Link->new( {
+        url  => $url,
+        text => $text,
+        name => $name,
+        tag  => $tag,
+        base => $base,
+        attr => $attr_href,
+    } );
+
+For compatibility, this older interface is also supported:
+
+ new( $url, $text, $name, $tag, $base, $attr_href )
+
+Creates and returns a new C<WWW::Mechanize::Link> object.
+
+=head1 Accessors
+
+=head2 $link->url()
+
+URL from the link
+
+=head2 $link->text()
+
+Text of the link
+
+=head2 $link->name()
+
+NAME attribute from the source tag, if any.
+
+=head2 $link->tag()
+
+Tag name (one of: "a", "area", "frame", "iframe" or "meta").
+
+=head2 $link->base()
+
+Base URL to which the links are relative.
+
+=head2 $link->attrs()
+
+Returns hash ref of all the attributes and attribute values in the tag.
+
+=head2 $link->URI()
+
+Returns the URL as a L<URI::URL> object.
+
+=head2 $link->url_abs()
+
+Returns a L<URI::URL> object for the absolute form of the string.
+
+=head1 SEE ALSO
+
+L<WWW::Mechanize> and L<WWW::Mechanize::Image>
+
+=head1 AUTHOR
+
+Andy Lester <andy at petdance.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2004 by Andy Lester.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=cut
--- a/database/perl/vendor/lib/WWW/RobotRules.pm
+++ b/database/perl/vendor/lib/WWW/RobotRules.pm
@@ -0,0 +1,453 @@
+package WWW::RobotRules;
+
+$VERSION = "6.02";
+sub Version { $VERSION; }
+
+use strict;
+use URI ();
+
+
+
+sub new {
+    my($class, $ua) = @_;
+
+    # This ugly hack is needed to ensure backwards compatibility.
+    # The "WWW::RobotRules" class is now really abstract.
+    $class = "WWW::RobotRules::InCore" if $class eq "WWW::RobotRules";
+
+    my $self = bless { }, $class;
+    $self->agent($ua);
+    $self;
+}
+
+
+sub parse {
+    my($self, $robot_txt_uri, $txt, $fresh_until) = @_;
+    $robot_txt_uri = URI->new("$robot_txt_uri");
+    my $netloc = $robot_txt_uri->host . ":" . $robot_txt_uri->port;
+
+    $self->clear_rules($netloc);
+    $self->fresh_until($netloc, $fresh_until || (time + 365*24*3600));
+
+    my $ua;
+    my $is_me = 0;		# 1 iff this record is for me
+    my $is_anon = 0;		# 1 iff this record is for *
+    my $seen_disallow = 0;      # watch for missing record separators
+    my @me_disallowed = ();	# rules disallowed for me
+    my @anon_disallowed = ();	# rules disallowed for *
+
+    # blank lines are significant, so turn CRLF into LF to avoid generating
+    # false ones
+    $txt =~ s/\015\012/\012/g;
+
+    # split at \012 (LF) or \015 (CR) (Mac text files have just CR for EOL)
+    for(split(/[\012\015]/, $txt)) {
+
+	# Lines containing only a comment are discarded completely, and
+        # therefore do not indicate a record boundary.
+	next if /^\s*\#/;
+
+	s/\s*\#.*//;        # remove comments at end-of-line
+
+	if (/^\s*$/) {	    # blank line
+	    last if $is_me; # That was our record. No need to read the rest.
+	    $is_anon = 0;
+	    $seen_disallow = 0;
+	}
+        elsif (/^\s*User-Agent\s*:\s*(.*)/i) {
+	    $ua = $1;
+	    $ua =~ s/\s+$//;
+
+	    if ($seen_disallow) {
+		# treat as start of a new record
+		$seen_disallow = 0;
+		last if $is_me; # That was our record. No need to read the rest.
+		$is_anon = 0;
+	    }
+
+	    if ($is_me) {
+		# This record already had a User-agent that
+		# we matched, so just continue.
+	    }
+	    elsif ($ua eq '*') {
+		$is_anon = 1;
+	    }
+	    elsif($self->is_me($ua)) {
+		$is_me = 1;
+	    }
+	}
+	elsif (/^\s*Disallow\s*:\s*(.*)/i) {
+	    unless (defined $ua) {
+		warn "RobotRules <$robot_txt_uri>: Disallow without preceding User-agent\n" if $^W;
+		$is_anon = 1;  # assume that User-agent: * was intended
+	    }
+	    my $disallow = $1;
+	    $disallow =~ s/\s+$//;
+	    $seen_disallow = 1;
+	    if (length $disallow) {
+		my $ignore;
+		eval {
+		    my $u = URI->new_abs($disallow, $robot_txt_uri);
+		    $ignore++ if $u->scheme ne $robot_txt_uri->scheme;
+		    $ignore++ if lc($u->host) ne lc($robot_txt_uri->host);
+		    $ignore++ if $u->port ne $robot_txt_uri->port;
+		    $disallow = $u->path_query;
+		    $disallow = "/" unless length $disallow;
+		};
+		next if $@;
+		next if $ignore;
+	    }
+
+	    if ($is_me) {
+		push(@me_disallowed, $disallow);
+	    }
+	    elsif ($is_anon) {
+		push(@anon_disallowed, $disallow);
+	    }
+	}
+        elsif (/\S\s*:/) {
+             # ignore
+        }
+	else {
+	    warn "RobotRules <$robot_txt_uri>: Malformed record: <$_>\n" if $^W;
+	}
+    }
+
+    if ($is_me) {
+	$self->push_rules($netloc, @me_disallowed);
+    }
+    else {
+	$self->push_rules($netloc, @anon_disallowed);
+    }
+}
+
+
+#
+# Returns TRUE if the given name matches the
+# name of this robot
+#
+sub is_me {
+    my($self, $ua_line) = @_;
+    my $me = $self->agent;
+
+    # See whether my short-name is a substring of the
+    #  "User-Agent: ..." line that we were passed:
+
+    if(index(lc($me), lc($ua_line)) >= 0) {
+      return 1;
+    }
+    else {
+      return '';
+    }
+}
+
+
+sub allowed {
+    my($self, $uri) = @_;
+    $uri = URI->new("$uri");
+
+    return 1 unless $uri->scheme eq 'http' or $uri->scheme eq 'https';
+     # Robots.txt applies to only those schemes.
+
+    my $netloc = $uri->host . ":" . $uri->port;
+
+    my $fresh_until = $self->fresh_until($netloc);
+    return -1 if !defined($fresh_until) || $fresh_until < time;
+
+    my $str = $uri->path_query;
+    my $rule;
+    for $rule ($self->rules($netloc)) {
+	return 1 unless length $rule;
+	return 0 if index($str, $rule) == 0;
+    }
+    return 1;
+}
+
+
+# The following methods must be provided by the subclass.
+sub agent;
+sub visit;
+sub no_visits;
+sub last_visits;
+sub fresh_until;
+sub push_rules;
+sub clear_rules;
+sub rules;
+sub dump;
+
+
+
+package WWW::RobotRules::InCore;
+
+use vars qw(@ISA);
+@ISA = qw(WWW::RobotRules);
+
+
+
+sub agent {
+    my ($self, $name) = @_;
+    my $old = $self->{'ua'};
+    if ($name) {
+        # Strip it so that it's just the short name.
+        # I.e., "FooBot"                                      => "FooBot"
+        #       "FooBot/1.2"                                  => "FooBot"
+        #       "FooBot/1.2 [http://foobot.int; foo@bot.int]" => "FooBot"
+
+	$name = $1 if $name =~ m/(\S+)/; # get first word
+	$name =~ s!/.*!!;  # get rid of version
+	unless ($old && $old eq $name) {
+	    delete $self->{'loc'}; # all old info is now stale
+	    $self->{'ua'} = $name;
+	}
+    }
+    $old;
+}
+
+
+sub visit {
+    my($self, $netloc, $time) = @_;
+    return unless $netloc;
+    $time ||= time;
+    $self->{'loc'}{$netloc}{'last'} = $time;
+    my $count = \$self->{'loc'}{$netloc}{'count'};
+    if (!defined $$count) {
+	$$count = 1;
+    }
+    else {
+	$$count++;
+    }
+}
+
+
+sub no_visits {
+    my ($self, $netloc) = @_;
+    $self->{'loc'}{$netloc}{'count'};
+}
+
+
+sub last_visit {
+    my ($self, $netloc) = @_;
+    $self->{'loc'}{$netloc}{'last'};
+}
+
+
+sub fresh_until {
+    my ($self, $netloc, $fresh_until) = @_;
+    my $old = $self->{'loc'}{$netloc}{'fresh'};
+    if (defined $fresh_until) {
+	$self->{'loc'}{$netloc}{'fresh'} = $fresh_until;
+    }
+    $old;
+}
+
+
+sub push_rules {
+    my($self, $netloc, @rules) = @_;
+    push (@{$self->{'loc'}{$netloc}{'rules'}}, @rules);
+}
+
+
+sub clear_rules {
+    my($self, $netloc) = @_;
+    delete $self->{'loc'}{$netloc}{'rules'};
+}
+
+
+sub rules {
+    my($self, $netloc) = @_;
+    if (defined $self->{'loc'}{$netloc}{'rules'}) {
+	return @{$self->{'loc'}{$netloc}{'rules'}};
+    }
+    else {
+	return ();
+    }
+}
+
+
+sub dump
+{
+    my $self = shift;
+    for (keys %$self) {
+	next if $_ eq 'loc';
+	print "$_ = $self->{$_}\n";
+    }
+    for (keys %{$self->{'loc'}}) {
+	my @rules = $self->rules($_);
+	print "$_: ", join("; ", @rules), "\n";
+    }
+}
+
+
+1;
+
+__END__
+
+
+# Bender: "Well, I don't have anything else
+#          planned for today.  Let's get drunk!"
+
+=head1 NAME
+
+WWW::RobotRules - database of robots.txt-derived permissions
+
+=head1 SYNOPSIS
+
+ use WWW::RobotRules;
+ my $rules = WWW::RobotRules->new('MOMspider/1.0');
+
+ use LWP::Simple qw(get);
+
+ {
+   my $url = "http://some.place/robots.txt";
+   my $robots_txt = get $url;
+   $rules->parse($url, $robots_txt) if defined $robots_txt;
+ }
+
+ {
+   my $url = "http://some.other.place/robots.txt";
+   my $robots_txt = get $url;
+   $rules->parse($url, $robots_txt) if defined $robots_txt;
+ }
+
+ # Now we can check if a URL is valid for those servers
+ # whose "robots.txt" files we've gotten and parsed:
+ if($rules->allowed($url)) {
+     $c = get $url;
+     ...
+ }
+
+=head1 DESCRIPTION
+
+This module parses F</robots.txt> files as specified in
+"A Standard for Robot Exclusion", at
+<http://www.robotstxt.org/wc/norobots.html>
+Webmasters can use the F</robots.txt> file to forbid conforming
+robots from accessing parts of their web site.
+
+The parsed files are kept in a WWW::RobotRules object, and this object
+provides methods to check if access to a given URL is prohibited.  The
+same WWW::RobotRules object can be used for one or more parsed
+F</robots.txt> files on any number of hosts.
+
+The following methods are provided:
+
+=over 4
+
+=item $rules = WWW::RobotRules->new($robot_name)
+
+This is the constructor for WWW::RobotRules objects.  The first
+argument given to new() is the name of the robot.
+
+=item $rules->parse($robot_txt_url, $content, $fresh_until)
+
+The parse() method takes as arguments the URL that was used to
+retrieve the F</robots.txt> file, and the contents of the file.
+
+=item $rules->allowed($uri)
+
+Returns TRUE if this robot is allowed to retrieve this URL.
+
+=item $rules->agent([$name])
+
+Get/set the agent name. NOTE: Changing the agent name will clear the robots.txt
+rules and expire times out of the cache.
+
+=back
+
+=head1 ROBOTS.TXT
+
+The format and semantics of the "/robots.txt" file are as follows
+(this is an edited abstract of
+<http://www.robotstxt.org/wc/norobots.html>):
+
+The file consists of one or more records separated by one or more
+blank lines. Each record contains lines of the form
+
+  <field-name>: <value>
+
+The field name is case insensitive.  Text after the '#' character on a
+line is ignored during parsing.  This is used for comments.  The
+following <field-names> can be used:
+
+=over 3
+
+=item User-Agent
+
+The value of this field is the name of the robot the record is
+describing access policy for.  If more than one I<User-Agent> field is
+present the record describes an identical access policy for more than
+one robot. At least one field needs to be present per record.  If the
+value is '*', the record describes the default access policy for any
+robot that has not not matched any of the other records.
+
+The I<User-Agent> fields must occur before the I<Disallow> fields.  If a
+record contains a I<User-Agent> field after a I<Disallow> field, that
+constitutes a malformed record.  This parser will assume that a blank
+line should have been placed before that I<User-Agent> field, and will
+break the record into two.  All the fields before the I<User-Agent> field
+will constitute a record, and the I<User-Agent> field will be the first
+field in a new record.
+
+=item Disallow
+
+The value of this field specifies a partial URL that is not to be
+visited. This can be a full path, or a partial path; any URL that
+starts with this value will not be retrieved
+
+=back
+
+Unrecognized records are ignored.
+
+=head1 ROBOTS.TXT EXAMPLES
+
+The following example "/robots.txt" file specifies that no robots
+should visit any URL starting with "/cyberworld/map/" or "/tmp/":
+
+  User-agent: *
+  Disallow: /cyberworld/map/ # This is an infinite virtual URL space
+  Disallow: /tmp/ # these will soon disappear
+
+This example "/robots.txt" file specifies that no robots should visit
+any URL starting with "/cyberworld/map/", except the robot called
+"cybermapper":
+
+  User-agent: *
+  Disallow: /cyberworld/map/ # This is an infinite virtual URL space
+
+  # Cybermapper knows where to go.
+  User-agent: cybermapper
+  Disallow:
+
+This example indicates that no robots should visit this site further:
+
+  # go away
+  User-agent: *
+  Disallow: /
+
+This is an example of a malformed robots.txt file.
+
+  # robots.txt for ancientcastle.example.com
+  # I've locked myself away.
+  User-agent: *
+  Disallow: /
+  # The castle is your home now, so you can go anywhere you like.
+  User-agent: Belle
+  Disallow: /west-wing/ # except the west wing!
+  # It's good to be the Prince...
+  User-agent: Beast
+  Disallow:
+
+This file is missing the required blank lines between records.
+However, the intention is clear.
+
+=head1 SEE ALSO
+
+L<LWP::RobotUA>, L<WWW::RobotRules::AnyDBM_File>
+
+=head1 COPYRIGHT
+
+  Copyright 1995-2009, Gisle Aas
+  Copyright 1995, Martijn Koster
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
--- a/database/perl/vendor/lib/WWW/RobotRules/AnyDBM_File.pm
+++ b/database/perl/vendor/lib/WWW/RobotRules/AnyDBM_File.pm
@@ -0,0 +1,170 @@
+package WWW::RobotRules::AnyDBM_File;
+
+require  WWW::RobotRules;
+@ISA = qw(WWW::RobotRules);
+$VERSION = "6.00";
+
+use Carp ();
+use AnyDBM_File;
+use Fcntl;
+use strict;
+
+=head1 NAME
+
+WWW::RobotRules::AnyDBM_File - Persistent RobotRules
+
+=head1 SYNOPSIS
+
+ require WWW::RobotRules::AnyDBM_File;
+ require LWP::RobotUA;
+
+ # Create a robot useragent that uses a diskcaching RobotRules
+ my $rules = WWW::RobotRules::AnyDBM_File->new( 'my-robot/1.0', 'cachefile' );
+ my $ua = WWW::RobotUA->new( 'my-robot/1.0', 'me@foo.com', $rules );
+
+ # Then just use $ua as usual
+ $res = $ua->request($req);
+
+=head1 DESCRIPTION
+
+This is a subclass of I<WWW::RobotRules> that uses the AnyDBM_File
+package to implement persistent diskcaching of F<robots.txt> and host
+visit information.
+
+The constructor (the new() method) takes an extra argument specifying
+the name of the DBM file to use.  If the DBM file already exists, then
+you can specify undef as agent name as the name can be obtained from
+the DBM database.
+
+=cut
+
+sub new 
+{ 
+  my ($class, $ua, $file) = @_;
+  Carp::croak('WWW::RobotRules::AnyDBM_File filename required') unless $file;
+
+  my $self = bless { }, $class;
+  $self->{'filename'} = $file;
+  tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_CREAT|O_RDWR, 0640
+    or Carp::croak("Can't open $file: $!");
+  
+  if ($ua) {
+      $self->agent($ua);
+  }
+  else {
+      # Try to obtain name from DBM file
+      $ua = $self->{'dbm'}{"|ua-name|"};
+      Carp::croak("No agent name specified") unless $ua;
+  }
+
+  $self;
+}
+
+sub agent {
+    my($self, $newname) = @_;
+    my $old = $self->{'dbm'}{"|ua-name|"};
+    if (defined $newname) {
+	$newname =~ s!/?\s*\d+.\d+\s*$!!;  # loose version
+	unless ($old && $old eq $newname) {
+	# Old info is now stale.
+	    my $file = $self->{'filename'};
+	    untie %{$self->{'dbm'}};
+	    tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_TRUNC|O_RDWR, 0640;
+	    %{$self->{'dbm'}} = ();
+	    $self->{'dbm'}{"|ua-name|"} = $newname;
+	}
+    }
+    $old;
+}
+
+sub no_visits {
+    my ($self, $netloc) = @_;
+    my $t = $self->{'dbm'}{"$netloc|vis"};
+    return 0 unless $t;
+    (split(/;\s*/, $t))[0];
+}
+
+sub last_visit {
+    my ($self, $netloc) = @_;
+    my $t = $self->{'dbm'}{"$netloc|vis"};
+    return undef unless $t;
+    (split(/;\s*/, $t))[1];
+}
+
+sub fresh_until {
+    my ($self, $netloc, $fresh) = @_;
+    my $old = $self->{'dbm'}{"$netloc|exp"};
+    if ($old) {
+	$old =~ s/;.*//;  # remove cleartext
+    }
+    if (defined $fresh) {
+	$fresh .= "; " . localtime($fresh);
+	$self->{'dbm'}{"$netloc|exp"} = $fresh;
+    }
+    $old;
+}
+
+sub visit {
+    my($self, $netloc, $time) = @_;
+    $time ||= time;
+
+    my $count = 0;
+    my $old = $self->{'dbm'}{"$netloc|vis"};
+    if ($old) {
+	my $last;
+	($count,$last) = split(/;\s*/, $old);
+	$time = $last if $last > $time;
+    }
+    $count++;
+    $self->{'dbm'}{"$netloc|vis"} = "$count; $time; " . localtime($time);
+}
+
+sub push_rules {
+    my($self, $netloc, @rules) = @_;
+    my $cnt = 1;
+    $cnt++ while $self->{'dbm'}{"$netloc|r$cnt"};
+
+    foreach (@rules) {
+	$self->{'dbm'}{"$netloc|r$cnt"} = $_;
+	$cnt++;
+    }
+}
+
+sub clear_rules {
+    my($self, $netloc) = @_;
+    my $cnt = 1;
+    while ($self->{'dbm'}{"$netloc|r$cnt"}) {
+	delete $self->{'dbm'}{"$netloc|r$cnt"};
+	$cnt++;
+    }
+}
+
+sub rules {
+    my($self, $netloc) = @_;
+    my @rules = ();
+    my $cnt = 1;
+    while (1) {
+	my $rule = $self->{'dbm'}{"$netloc|r$cnt"};
+	last unless $rule;
+	push(@rules, $rule);
+	$cnt++;
+    }
+    @rules;
+}
+
+sub dump
+{
+}
+
+1;
+
+=head1 SEE ALSO
+
+L<WWW::RobotRules>, L<LWP::RobotUA>
+
+=head1 AUTHORS
+
+Hakan Ardo E<lt>hakan@munin.ub2.lu.se>, Gisle Aas E<lt>aas@sn.no>
+
+=cut
+