Initial Commit
This commit is contained in:
379
database/perl/vendor/lib/CPAN/SQLite/Index.pm
vendored
Normal file
379
database/perl/vendor/lib/CPAN/SQLite/Index.pm
vendored
Normal file
@@ -0,0 +1,379 @@
|
||||
# $Id: Index.pm 84 2020-05-31 06:29:34Z stro $
|
||||
|
||||
package CPAN::SQLite::Index;
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
our $VERSION = '0.219';
|
||||
|
||||
use English qw/-no_match_vars/;
|
||||
|
||||
use CPAN::SQLite::Info;
|
||||
use CPAN::SQLite::State;
|
||||
use CPAN::SQLite::Populate;
|
||||
use CPAN::SQLite::DBI qw($tables);
|
||||
use File::Spec::Functions qw(catfile);
|
||||
use File::Basename;
|
||||
use File::Path;
|
||||
use HTTP::Tiny;
|
||||
|
||||
use Scalar::Util 'weaken';
|
||||
|
||||
unless ($ENV{CPAN_SQLITE_NO_LOG_FILES}) {
|
||||
$ENV{CPAN_SQLITE_DEBUG} = 1;
|
||||
}
|
||||
|
||||
our ($oldout);
|
||||
my $log_file = 'cpan_sqlite_log.' . time;
|
||||
|
||||
# This is usually already defined in real life, but tests need it to be set
|
||||
$CPAN::FrontEnd ||= "CPAN::Shell";
|
||||
|
||||
sub new {
|
||||
my ($class, %args) = @_;
|
||||
if ($args{setup} and $args{reindex}) {
|
||||
die "Reindexing must be done on an exisiting database";
|
||||
}
|
||||
|
||||
my $self = { index => undef, state => undef, %args };
|
||||
|
||||
return bless $self, $class;
|
||||
}
|
||||
|
||||
sub download_index {
|
||||
my $self = shift;
|
||||
|
||||
if ($ENV{'CPAN_SQLITE_DOWNLOAD'}) {
|
||||
$ENV{'CPAN_SQLITE_DOWNLOAD_URL'} = 'http://cpansqlite.trouchelle.com/' unless $ENV{'CPAN_SQLITE_DOWNLOAD_URL'};
|
||||
}
|
||||
|
||||
return 0 unless $ENV{'CPAN_SQLITE_DOWNLOAD_URL'};
|
||||
|
||||
$CPAN::FrontEnd->myprint("Downloading the compiled index db ... ");
|
||||
|
||||
if (my $response =
|
||||
HTTP::Tiny->new->mirror($ENV{'CPAN_SQLITE_DOWNLOAD_URL'} => catfile($self->{'db_dir'}, $self->{'db_name'})))
|
||||
{
|
||||
if ($response->{'success'} and $response->{'status'} and $response->{'status'} eq '200') {
|
||||
if (my $type = $response->{'headers'}->{'content-type'}) {
|
||||
if ($type eq 'application/x-sqlite3') {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$CPAN::FrontEnd->mywarn('Cannot download the compiled index db');
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub index {
|
||||
my $self = shift;
|
||||
my $setup = $self->{'setup'};
|
||||
|
||||
if ($setup) {
|
||||
my $db_name = catfile($self->{'db_dir'}, $self->{db_name});
|
||||
if (-f $db_name) {
|
||||
$CPAN::FrontEnd->myprint("Removing existing $db_name ... ");
|
||||
if (unlink $db_name) {
|
||||
$CPAN::FrontEnd->myprint("Done.\n");
|
||||
} else {
|
||||
$CPAN::FrontEnd->mywarn("Failed: $!\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
my $log = catfile($self->{'log_dir'}, $log_file);
|
||||
|
||||
unless ($ENV{'CPAN_SQLITE_NO_LOG_FILES'}) {
|
||||
$oldout = error_fh($log);
|
||||
}
|
||||
|
||||
my $log_cleanup = $ENV{'CPAN_SQLITE_LOG_FILES_CLEANUP'};
|
||||
$log_cleanup = 30 unless defined $log_cleanup;
|
||||
if ($log_cleanup and $log_cleanup =~ /^\d+$/) {
|
||||
if (opendir(my $DIR, $self->{'log_dir'})) {
|
||||
my @files = grep { /cpan_sqlite_log\./ } readdir $DIR;
|
||||
closedir $DIR;
|
||||
|
||||
@files = grep { -C $_ > $log_cleanup } map { catfile($self->{'log_dir'}, $_) } @files;
|
||||
|
||||
if (@files) {
|
||||
$CPAN::FrontEnd->myprint('Cleaning old log files ... ');
|
||||
unlink @files;
|
||||
$CPAN::FrontEnd->myprint("Done.\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($self->download_index()) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ($self->{'update_indices'}) {
|
||||
$CPAN::FrontEnd->myprint('Fetching index files ... ');
|
||||
if ($self->fetch_cpan_indices()) {
|
||||
$CPAN::FrontEnd->myprint("Done.\n");
|
||||
} else {
|
||||
$CPAN::FrontEnd->mywarn("Failed\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
$CPAN::FrontEnd->myprint('Gathering information from index files ... ');
|
||||
if ($self->fetch_info()) {
|
||||
$CPAN::FrontEnd->myprint("Done.\n");
|
||||
} else {
|
||||
$CPAN::FrontEnd->mywarn("Failed\n");
|
||||
return;
|
||||
}
|
||||
|
||||
unless ($setup) {
|
||||
$CPAN::FrontEnd->myprint('Obtaining current state of database ... ');
|
||||
if ($self->state()) {
|
||||
$CPAN::FrontEnd->myprint("Done.\n");
|
||||
} else {
|
||||
$CPAN::FrontEnd->mywarn("Failed\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
$CPAN::FrontEnd->myprint('Populating database tables ... ');
|
||||
if ($self->populate()) {
|
||||
$CPAN::FrontEnd->myprint("Done.\n");
|
||||
} else {
|
||||
$CPAN::FrontEnd->mywarn("Failed\n");
|
||||
return;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub fetch_cpan_indices {
|
||||
my $self = shift;
|
||||
|
||||
my $CPAN = $self->{CPAN};
|
||||
my $indices = {
|
||||
'01mailrc.txt.gz' => 'authors',
|
||||
'02packages.details.txt.gz' => 'modules',
|
||||
};
|
||||
|
||||
foreach my $index (keys %$indices) {
|
||||
my $file = catfile($CPAN, $indices->{$index}, $index);
|
||||
next if (-e $file and -M $file < 1);
|
||||
my $dir = dirname($file);
|
||||
unless (-d $dir) {
|
||||
mkpath($dir, 0, oct(755)) or die "Cannot mkpath $dir: $!";
|
||||
}
|
||||
my @urllist = @{ $self->{urllist} };
|
||||
foreach my $cpan (@urllist) {
|
||||
my $from = join '/', ($cpan, $indices->{$index}, $index);
|
||||
if (my $response = HTTP::Tiny->new->get($from)) {
|
||||
if ($response->{'success'}) {
|
||||
if (open(my $FILE, '>', $file)) {
|
||||
binmode $FILE;
|
||||
print $FILE $response->{'content'};
|
||||
if (close($FILE)) {
|
||||
next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
unless (-f $file) {
|
||||
$CPAN::FrontEnd->mywarn("Cannot retrieve '$file'");
|
||||
return;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub fetch_info {
|
||||
my $self = shift;
|
||||
my %wanted = map { $_ => $self->{$_} } qw(CPAN ignore keep_source_where);
|
||||
my $info = CPAN::SQLite::Info->new(%wanted);
|
||||
$info->fetch_info() or return;
|
||||
my @tables = qw(dists mods auths info);
|
||||
my $index;
|
||||
foreach my $table (@tables) {
|
||||
my $class = __PACKAGE__ . '::' . $table;
|
||||
my $this = { info => $info->{$table} };
|
||||
$index->{$table} = bless $this, $class;
|
||||
}
|
||||
$self->{index} = $index;
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub state {
|
||||
my $self = shift;
|
||||
|
||||
my %wanted = map { $_ => $self->{$_} } qw(db_name index setup reindex db_dir);
|
||||
my $state = CPAN::SQLite::State->new(%wanted);
|
||||
$state->state() or return;
|
||||
$self->{state} = $state;
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub populate {
|
||||
my $self = shift;
|
||||
my %wanted = map { $_ => $self->{$_} } qw(db_name index setup state db_dir);
|
||||
my $db = CPAN::SQLite::Populate->new(%wanted);
|
||||
$db->populate() or return;
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub error_fh {
|
||||
my $file = shift;
|
||||
open(my $tmp, '>', $file) or die "Cannot open $file: $!";
|
||||
close $tmp;
|
||||
|
||||
# Should be open(my $oldout, '>&', \*STDOUT); but it fails on 5.6.2
|
||||
open(my $oldout, '>&STDOUT');
|
||||
open(STDOUT, '>', $file) or die "Cannot tie STDOUT to $file: $!";
|
||||
select STDOUT;
|
||||
$| = 1;
|
||||
return $oldout;
|
||||
}
|
||||
|
||||
sub DESTROY {
|
||||
unless ($ENV{CPAN_SQLITE_NO_LOG_FILES}) {
|
||||
close STDOUT;
|
||||
open(STDOUT, '>&', $oldout) if $oldout;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
=head1 NAME
|
||||
|
||||
CPAN::SQLite::Index - set up or update database tables.
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
version 0.219
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
my $index = CPAN::SQLite::Index->new(setup => 1);
|
||||
$index->index();
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This is the main module used to set up or update the
|
||||
database tables used to store information from the
|
||||
CPAN and ppm indices. The creation of the object
|
||||
|
||||
my $index = CPAN::SQLite::Index->new(%args);
|
||||
|
||||
accepts two possible arguments:
|
||||
|
||||
=over 3
|
||||
|
||||
=item * setup =E<gt> 1
|
||||
|
||||
This (optional) argument specifies that the database is being set up.
|
||||
Any existing tables will be dropped.
|
||||
|
||||
=item * reindex =E<gt> value
|
||||
|
||||
This (optional) argument specifies distribution names that
|
||||
one would like to reindex in an existing database. These may
|
||||
be specified as either a scalar, for a single distribution,
|
||||
or as an array reference for a list of distributions.
|
||||
|
||||
=back
|
||||
|
||||
=head1 DETAILS
|
||||
|
||||
Calling
|
||||
|
||||
$index->index();
|
||||
|
||||
will start the indexing procedure. Various messages
|
||||
detailing the progress will written to I<STDOUT>,
|
||||
which by default will be captured into a file
|
||||
F<cpan_sqlite_log.dddddddddd>, where the extension
|
||||
is the C<time> that the method was invoked. Error messages
|
||||
are not captured, and will appear in I<STDERR>.
|
||||
|
||||
The steps of the indexing procedure are as follows.
|
||||
|
||||
=over 4
|
||||
|
||||
=item * download existing pre-compiled index (optional)
|
||||
|
||||
If CPAN_SQLITE_DOWNLOAD or CPAN_SQLITE_DOWNLOAD_URL variables are set, an
|
||||
already existing and up-to-date cpandb.sql file will be downloaded from
|
||||
either specified URL or http://cpansqlite.trouchelle.com/ where it's
|
||||
updated every hour. This greatly increases performance and decreases CPU
|
||||
and memory consumption during the indexing process but if your CPAN
|
||||
mirror is out-of-sync or you're using DarkPAN, it obviously wouldn't
|
||||
work. It also wouldn't work without an internet connection.
|
||||
|
||||
See L<WWW::CPAN::SQLite> if you want to setup your own service for
|
||||
pre-compiling the database.
|
||||
|
||||
If neither variable is set, this step is skipped.
|
||||
|
||||
=item * fetch index data
|
||||
|
||||
The necessary CPAN index files
|
||||
F<$CPAN/authors/01mailrc.txt.gz> and
|
||||
F<$CPAN/modules/02packages.details.txt.gz> will be fetched
|
||||
from the CPAN mirror specified by the C<$cpan> variable
|
||||
at the beginning of L<CPAN::SQLite::Index>. If you are
|
||||
using this option, it is recommended to use the
|
||||
same CPAN mirror with subsequent updates, to ensure consistency
|
||||
of the database. As well, the information on the locations
|
||||
of the CPAN mirrors used for Template-Toolkit and GeoIP
|
||||
is written.
|
||||
|
||||
=item * get index information
|
||||
|
||||
Information from the CPAN indices is extracted through
|
||||
L<CPAN::SQLite::Info>.
|
||||
|
||||
=item * get state information
|
||||
|
||||
Unless the C<setup> argument within the C<new>
|
||||
method of L<CPAN::SQLite::Index> is specified,
|
||||
this will get information on the state of the database
|
||||
through L<CPAN::SQLite::State>.
|
||||
A comparison is then made between this information
|
||||
and that gathered from the CPAN indices, and if there's
|
||||
a discrepancy in some items, those items are marked
|
||||
for either insertion, updating, or deletion, as appropriate.
|
||||
|
||||
=item * populate the database
|
||||
|
||||
At this stage the gathered information is used to populate
|
||||
the database, through L<CPAN::SQLite::Populate>,
|
||||
either inserting new items, updating
|
||||
existing ones, or deleting obsolete items.
|
||||
|
||||
=back
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<CPAN::SQLite::Info>, L<CPAN::SQLite::State>,
|
||||
L<CPAN::SQLite::Populate>,
|
||||
and L<CPAN::SQLite::Util>.
|
||||
Development takes place on the CPAN-SQLite project
|
||||
at L<http://sourceforge.net/projects/cpan-search/>.
|
||||
|
||||
=head1 AUTHORS
|
||||
|
||||
Randy Kobes (passed away on September 18, 2010)
|
||||
|
||||
Serguei Trouchelle E<lt>stro@cpan.orgE<gt>
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
Copyright 2006 by Randy Kobes E<lt>r.kobes@uwinnipeg.caE<gt>.
|
||||
|
||||
Copyright 2011 by Serguei Trouchelle E<lt>stro@cpan.orgE<gt>.
|
||||
|
||||
Use and redistribution are under the same terms as Perl itself.
|
||||
|
||||
=cut
|
||||
Reference in New Issue
Block a user