380 lines
9.7 KiB
Perl
380 lines
9.7 KiB
Perl
# $Id: Index.pm 84 2020-05-31 06:29:34Z stro $
|
|
|
|
package CPAN::SQLite::Index;
|
|
use strict;
|
|
use warnings;
|
|
|
|
our $VERSION = '0.219';
|
|
|
|
use English qw/-no_match_vars/;
|
|
|
|
use CPAN::SQLite::Info;
|
|
use CPAN::SQLite::State;
|
|
use CPAN::SQLite::Populate;
|
|
use CPAN::SQLite::DBI qw($tables);
|
|
use File::Spec::Functions qw(catfile);
|
|
use File::Basename;
|
|
use File::Path;
|
|
use HTTP::Tiny;
|
|
|
|
use Scalar::Util 'weaken';
|
|
|
|
unless ($ENV{CPAN_SQLITE_NO_LOG_FILES}) {
|
|
$ENV{CPAN_SQLITE_DEBUG} = 1;
|
|
}
|
|
|
|
our ($oldout);
|
|
my $log_file = 'cpan_sqlite_log.' . time;
|
|
|
|
# This is usually already defined in real life, but tests need it to be set
|
|
$CPAN::FrontEnd ||= "CPAN::Shell";
|
|
|
|
sub new {
|
|
my ($class, %args) = @_;
|
|
if ($args{setup} and $args{reindex}) {
|
|
die "Reindexing must be done on an exisiting database";
|
|
}
|
|
|
|
my $self = { index => undef, state => undef, %args };
|
|
|
|
return bless $self, $class;
|
|
}
|
|
|
|
sub download_index {
|
|
my $self = shift;
|
|
|
|
if ($ENV{'CPAN_SQLITE_DOWNLOAD'}) {
|
|
$ENV{'CPAN_SQLITE_DOWNLOAD_URL'} = 'http://cpansqlite.trouchelle.com/' unless $ENV{'CPAN_SQLITE_DOWNLOAD_URL'};
|
|
}
|
|
|
|
return 0 unless $ENV{'CPAN_SQLITE_DOWNLOAD_URL'};
|
|
|
|
$CPAN::FrontEnd->myprint("Downloading the compiled index db ... ");
|
|
|
|
if (my $response =
|
|
HTTP::Tiny->new->mirror($ENV{'CPAN_SQLITE_DOWNLOAD_URL'} => catfile($self->{'db_dir'}, $self->{'db_name'})))
|
|
{
|
|
if ($response->{'success'} and $response->{'status'} and $response->{'status'} eq '200') {
|
|
if (my $type = $response->{'headers'}->{'content-type'}) {
|
|
if ($type eq 'application/x-sqlite3') {
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
$CPAN::FrontEnd->mywarn('Cannot download the compiled index db');
|
|
return 0;
|
|
}
|
|
|
|
sub index {
|
|
my $self = shift;
|
|
my $setup = $self->{'setup'};
|
|
|
|
if ($setup) {
|
|
my $db_name = catfile($self->{'db_dir'}, $self->{db_name});
|
|
if (-f $db_name) {
|
|
$CPAN::FrontEnd->myprint("Removing existing $db_name ... ");
|
|
if (unlink $db_name) {
|
|
$CPAN::FrontEnd->myprint("Done.\n");
|
|
} else {
|
|
$CPAN::FrontEnd->mywarn("Failed: $!\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
my $log = catfile($self->{'log_dir'}, $log_file);
|
|
|
|
unless ($ENV{'CPAN_SQLITE_NO_LOG_FILES'}) {
|
|
$oldout = error_fh($log);
|
|
}
|
|
|
|
my $log_cleanup = $ENV{'CPAN_SQLITE_LOG_FILES_CLEANUP'};
|
|
$log_cleanup = 30 unless defined $log_cleanup;
|
|
if ($log_cleanup and $log_cleanup =~ /^\d+$/) {
|
|
if (opendir(my $DIR, $self->{'log_dir'})) {
|
|
my @files = grep { /cpan_sqlite_log\./ } readdir $DIR;
|
|
closedir $DIR;
|
|
|
|
@files = grep { -C $_ > $log_cleanup } map { catfile($self->{'log_dir'}, $_) } @files;
|
|
|
|
if (@files) {
|
|
$CPAN::FrontEnd->myprint('Cleaning old log files ... ');
|
|
unlink @files;
|
|
$CPAN::FrontEnd->myprint("Done.\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($self->download_index()) {
|
|
return 1;
|
|
}
|
|
|
|
if ($self->{'update_indices'}) {
|
|
$CPAN::FrontEnd->myprint('Fetching index files ... ');
|
|
if ($self->fetch_cpan_indices()) {
|
|
$CPAN::FrontEnd->myprint("Done.\n");
|
|
} else {
|
|
$CPAN::FrontEnd->mywarn("Failed\n");
|
|
return;
|
|
}
|
|
}
|
|
|
|
$CPAN::FrontEnd->myprint('Gathering information from index files ... ');
|
|
if ($self->fetch_info()) {
|
|
$CPAN::FrontEnd->myprint("Done.\n");
|
|
} else {
|
|
$CPAN::FrontEnd->mywarn("Failed\n");
|
|
return;
|
|
}
|
|
|
|
unless ($setup) {
|
|
$CPAN::FrontEnd->myprint('Obtaining current state of database ... ');
|
|
if ($self->state()) {
|
|
$CPAN::FrontEnd->myprint("Done.\n");
|
|
} else {
|
|
$CPAN::FrontEnd->mywarn("Failed\n");
|
|
return;
|
|
}
|
|
}
|
|
|
|
$CPAN::FrontEnd->myprint('Populating database tables ... ');
|
|
if ($self->populate()) {
|
|
$CPAN::FrontEnd->myprint("Done.\n");
|
|
} else {
|
|
$CPAN::FrontEnd->mywarn("Failed\n");
|
|
return;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
sub fetch_cpan_indices {
|
|
my $self = shift;
|
|
|
|
my $CPAN = $self->{CPAN};
|
|
my $indices = {
|
|
'01mailrc.txt.gz' => 'authors',
|
|
'02packages.details.txt.gz' => 'modules',
|
|
};
|
|
|
|
foreach my $index (keys %$indices) {
|
|
my $file = catfile($CPAN, $indices->{$index}, $index);
|
|
next if (-e $file and -M $file < 1);
|
|
my $dir = dirname($file);
|
|
unless (-d $dir) {
|
|
mkpath($dir, 0, oct(755)) or die "Cannot mkpath $dir: $!";
|
|
}
|
|
my @urllist = @{ $self->{urllist} };
|
|
foreach my $cpan (@urllist) {
|
|
my $from = join '/', ($cpan, $indices->{$index}, $index);
|
|
if (my $response = HTTP::Tiny->new->get($from)) {
|
|
if ($response->{'success'}) {
|
|
if (open(my $FILE, '>', $file)) {
|
|
binmode $FILE;
|
|
print $FILE $response->{'content'};
|
|
if (close($FILE)) {
|
|
next;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
unless (-f $file) {
|
|
$CPAN::FrontEnd->mywarn("Cannot retrieve '$file'");
|
|
return;
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
sub fetch_info {
|
|
my $self = shift;
|
|
my %wanted = map { $_ => $self->{$_} } qw(CPAN ignore keep_source_where);
|
|
my $info = CPAN::SQLite::Info->new(%wanted);
|
|
$info->fetch_info() or return;
|
|
my @tables = qw(dists mods auths info);
|
|
my $index;
|
|
foreach my $table (@tables) {
|
|
my $class = __PACKAGE__ . '::' . $table;
|
|
my $this = { info => $info->{$table} };
|
|
$index->{$table} = bless $this, $class;
|
|
}
|
|
$self->{index} = $index;
|
|
return 1;
|
|
}
|
|
|
|
sub state {
|
|
my $self = shift;
|
|
|
|
my %wanted = map { $_ => $self->{$_} } qw(db_name index setup reindex db_dir);
|
|
my $state = CPAN::SQLite::State->new(%wanted);
|
|
$state->state() or return;
|
|
$self->{state} = $state;
|
|
return 1;
|
|
}
|
|
|
|
sub populate {
|
|
my $self = shift;
|
|
my %wanted = map { $_ => $self->{$_} } qw(db_name index setup state db_dir);
|
|
my $db = CPAN::SQLite::Populate->new(%wanted);
|
|
$db->populate() or return;
|
|
return 1;
|
|
}
|
|
|
|
sub error_fh {
|
|
my $file = shift;
|
|
open(my $tmp, '>', $file) or die "Cannot open $file: $!";
|
|
close $tmp;
|
|
|
|
# Should be open(my $oldout, '>&', \*STDOUT); but it fails on 5.6.2
|
|
open(my $oldout, '>&STDOUT');
|
|
open(STDOUT, '>', $file) or die "Cannot tie STDOUT to $file: $!";
|
|
select STDOUT;
|
|
$| = 1;
|
|
return $oldout;
|
|
}
|
|
|
|
sub DESTROY {
|
|
unless ($ENV{CPAN_SQLITE_NO_LOG_FILES}) {
|
|
close STDOUT;
|
|
open(STDOUT, '>&', $oldout) if $oldout;
|
|
}
|
|
return;
|
|
}
|
|
|
|
1;
|
|
|
|
=head1 NAME
|
|
|
|
CPAN::SQLite::Index - set up or update database tables.
|
|
|
|
=head1 VERSION
|
|
|
|
version 0.219
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
my $index = CPAN::SQLite::Index->new(setup => 1);
|
|
$index->index();
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This is the main module used to set up or update the
|
|
database tables used to store information from the
|
|
CPAN and ppm indices. The creation of the object
|
|
|
|
my $index = CPAN::SQLite::Index->new(%args);
|
|
|
|
accepts two possible arguments:
|
|
|
|
=over 3
|
|
|
|
=item * setup =E<gt> 1
|
|
|
|
This (optional) argument specifies that the database is being set up.
|
|
Any existing tables will be dropped.
|
|
|
|
=item * reindex =E<gt> value
|
|
|
|
This (optional) argument specifies distribution names that
|
|
one would like to reindex in an existing database. These may
|
|
be specified as either a scalar, for a single distribution,
|
|
or as an array reference for a list of distributions.
|
|
|
|
=back
|
|
|
|
=head1 DETAILS
|
|
|
|
Calling
|
|
|
|
$index->index();
|
|
|
|
will start the indexing procedure. Various messages
|
|
detailing the progress will written to I<STDOUT>,
|
|
which by default will be captured into a file
|
|
F<cpan_sqlite_log.dddddddddd>, where the extension
|
|
is the C<time> that the method was invoked. Error messages
|
|
are not captured, and will appear in I<STDERR>.
|
|
|
|
The steps of the indexing procedure are as follows.
|
|
|
|
=over 4
|
|
|
|
=item * download existing pre-compiled index (optional)
|
|
|
|
If CPAN_SQLITE_DOWNLOAD or CPAN_SQLITE_DOWNLOAD_URL variables are set, an
|
|
already existing and up-to-date cpandb.sql file will be downloaded from
|
|
either specified URL or http://cpansqlite.trouchelle.com/ where it's
|
|
updated every hour. This greatly increases performance and decreases CPU
|
|
and memory consumption during the indexing process but if your CPAN
|
|
mirror is out-of-sync or you're using DarkPAN, it obviously wouldn't
|
|
work. It also wouldn't work without an internet connection.
|
|
|
|
See L<WWW::CPAN::SQLite> if you want to setup your own service for
|
|
pre-compiling the database.
|
|
|
|
If neither variable is set, this step is skipped.
|
|
|
|
=item * fetch index data
|
|
|
|
The necessary CPAN index files
|
|
F<$CPAN/authors/01mailrc.txt.gz> and
|
|
F<$CPAN/modules/02packages.details.txt.gz> will be fetched
|
|
from the CPAN mirror specified by the C<$cpan> variable
|
|
at the beginning of L<CPAN::SQLite::Index>. If you are
|
|
using this option, it is recommended to use the
|
|
same CPAN mirror with subsequent updates, to ensure consistency
|
|
of the database. As well, the information on the locations
|
|
of the CPAN mirrors used for Template-Toolkit and GeoIP
|
|
is written.
|
|
|
|
=item * get index information
|
|
|
|
Information from the CPAN indices is extracted through
|
|
L<CPAN::SQLite::Info>.
|
|
|
|
=item * get state information
|
|
|
|
Unless the C<setup> argument within the C<new>
|
|
method of L<CPAN::SQLite::Index> is specified,
|
|
this will get information on the state of the database
|
|
through L<CPAN::SQLite::State>.
|
|
A comparison is then made between this information
|
|
and that gathered from the CPAN indices, and if there's
|
|
a discrepancy in some items, those items are marked
|
|
for either insertion, updating, or deletion, as appropriate.
|
|
|
|
=item * populate the database
|
|
|
|
At this stage the gathered information is used to populate
|
|
the database, through L<CPAN::SQLite::Populate>,
|
|
either inserting new items, updating
|
|
existing ones, or deleting obsolete items.
|
|
|
|
=back
|
|
|
|
=head1 SEE ALSO
|
|
|
|
L<CPAN::SQLite::Info>, L<CPAN::SQLite::State>,
|
|
L<CPAN::SQLite::Populate>,
|
|
and L<CPAN::SQLite::Util>.
|
|
Development takes place on the CPAN-SQLite project
|
|
at L<http://sourceforge.net/projects/cpan-search/>.
|
|
|
|
=head1 AUTHORS
|
|
|
|
Randy Kobes (passed away on September 18, 2010)
|
|
|
|
Serguei Trouchelle E<lt>stro@cpan.orgE<gt>
|
|
|
|
=head1 COPYRIGHT
|
|
|
|
Copyright 2006 by Randy Kobes E<lt>r.kobes@uwinnipeg.caE<gt>.
|
|
|
|
Copyright 2011 by Serguei Trouchelle E<lt>stro@cpan.orgE<gt>.
|
|
|
|
Use and redistribution are under the same terms as Perl itself.
|
|
|
|
=cut
|