Initial Commit

This commit is contained in:
Riley Schneider
2025-12-03 16:38:10 +01:00
parent c5e26bf594
commit b732d8d4b5
17680 changed files with 5977495 additions and 2 deletions

View File

@@ -0,0 +1,427 @@
package DBM::Deep::Array;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
no warnings 'recursion';
# This is to allow DBM::Deep::Array to handle negative indices on
# its own. Otherwise, Perl would intercept the call to negative
# indices for us. This was causing bugs for negative index handling.
our $NEGATIVE_INDICES = 1;
use base 'DBM::Deep';
use Scalar::Util ();
sub _get_self {
# We used to have
# eval { local $SIG{'__DIE__'}; tied( @{$_[0]} ) } || $_[0]
# but this does not always work during global destruction (DBM::Deeps
# destructor calls this method), but will return $_[0] even when $_[0]
# is tied, if its tied to undef. In those cases its better to return
# undef, so the destructor can tell not to do anything, and, if any-
# thing else calls us, it will fail with a more helpful error message.
Scalar::Util::reftype $_[0] eq 'ARRAY' ? tied @{$_[0]} : $_[0];
}
sub _repr { [] }
sub TIEARRAY {
my $class = shift;
my $args = $class->_get_args( @_ );
$args->{type} = $class->TYPE_ARRAY;
return $class->_init($args);
}
sub FETCH {
my $self = shift->_get_self;
my ($key) = @_;
$self->lock_shared;
if ( !defined $key ) {
$self->unlock;
DBM::Deep->_throw_error( "Cannot use an undefined array index." );
}
elsif ( $key =~ /^-?\d+$/ ) {
if ( $key < 0 ) {
$key += $self->FETCHSIZE;
unless ( $key >= 0 ) {
$self->unlock;
return;
}
}
}
elsif ( $key ne 'length' ) {
$self->unlock;
DBM::Deep->_throw_error( "Cannot use '$key' as an array index." );
}
my $rv = $self->SUPER::FETCH( $key );
$self->unlock;
return $rv;
}
sub STORE {
my $self = shift->_get_self;
my ($key, $value) = @_;
$self->lock_exclusive;
my $size;
my $idx_is_numeric;
if ( !defined $key ) {
$self->unlock;
DBM::Deep->_throw_error( "Cannot use an undefined array index." );
}
elsif ( $key =~ /^-?\d+$/ ) {
$idx_is_numeric = 1;
if ( $key < 0 ) {
$size = $self->FETCHSIZE;
if ( $key + $size < 0 ) {
die( "Modification of non-creatable array value attempted, subscript $key" );
}
$key += $size
}
}
elsif ( $key ne 'length' ) {
$self->unlock;
DBM::Deep->_throw_error( "Cannot use '$key' as an array index." );
}
my $rv = $self->SUPER::STORE( $key, $value );
if ( $idx_is_numeric ) {
$size = $self->FETCHSIZE unless defined $size;
if ( $key >= $size ) {
$self->STORESIZE( $key + 1 );
}
}
$self->unlock;
return $rv;
}
sub EXISTS {
my $self = shift->_get_self;
my ($key) = @_;
$self->lock_shared;
if ( !defined $key ) {
$self->unlock;
DBM::Deep->_throw_error( "Cannot use an undefined array index." );
}
elsif ( $key =~ /^-?\d+$/ ) {
if ( $key < 0 ) {
$key += $self->FETCHSIZE;
unless ( $key >= 0 ) {
$self->unlock;
return;
}
}
}
elsif ( $key ne 'length' ) {
$self->unlock;
DBM::Deep->_throw_error( "Cannot use '$key' as an array index." );
}
my $rv = $self->SUPER::EXISTS( $key );
$self->unlock;
return $rv;
}
sub DELETE {
my $self = shift->_get_self;
my ($key) = @_;
warn "ARRAY::DELETE($self,$key)\n" if DBM::Deep::DEBUG;
$self->lock_exclusive;
my $size = $self->FETCHSIZE;
if ( !defined $key ) {
$self->unlock;
DBM::Deep->_throw_error( "Cannot use an undefined array index." );
}
elsif ( $key =~ /^-?\d+$/ ) {
if ( $key < 0 ) {
$key += $size;
unless ( $key >= 0 ) {
$self->unlock;
return;
}
}
}
elsif ( $key ne 'length' ) {
$self->unlock;
DBM::Deep->_throw_error( "Cannot use '$key' as an array index." );
}
my $rv = $self->SUPER::DELETE( $key );
if ($rv && $key == $size - 1) {
$self->STORESIZE( $key );
}
$self->unlock;
return $rv;
}
# Now that we have a real Reference sector, we should store arrayzize there.
# However, arraysize needs to be transactionally-aware, so a simple location to
# store it isn't going to work.
sub FETCHSIZE {
my $self = shift->_get_self;
$self->lock_shared;
my $SAVE_FILTER = $self->_engine->storage->{filter_fetch_value};
$self->_engine->storage->{filter_fetch_value} = undef;
my $size = $self->FETCH('length') || 0;
$self->_engine->storage->{filter_fetch_value} = $SAVE_FILTER;
$self->unlock;
return $size;
}
sub STORESIZE {
my $self = shift->_get_self;
my ($new_length) = @_;
$self->lock_exclusive;
my $SAVE_FILTER = $self->_engine->storage->{filter_store_value};
$self->_engine->storage->{filter_store_value} = undef;
my $result = $self->STORE('length', $new_length, 'length');
$self->_engine->storage->{filter_store_value} = $SAVE_FILTER;
$self->unlock;
return $result;
}
sub POP {
my $self = shift->_get_self;
$self->lock_exclusive;
my $length = $self->FETCHSIZE();
if ($length) {
my $content = $self->FETCH( $length - 1 );
$self->DELETE( $length - 1 );
$self->unlock;
return $content;
}
else {
$self->unlock;
return;
}
}
sub PUSH {
my $self = shift->_get_self;
$self->lock_exclusive;
my $length = $self->FETCHSIZE();
for my $content (@_) {
$self->STORE( $length, $content );
$length++;
}
$self->unlock;
return $length;
}
# XXX This really needs to be something more direct within the file, not a
# fetch and re-store. -RobK, 2007-09-20
sub _move_value {
my $self = shift;
my ($old_key, $new_key) = @_;
return $self->_engine->make_reference( $self, $old_key, $new_key );
}
sub SHIFT {
my $self = shift->_get_self;
warn "SHIFT($self)\n" if DBM::Deep::DEBUG;
$self->lock_exclusive;
my $length = $self->FETCHSIZE();
if ( !$length ) {
$self->unlock;
return;
}
my $content = $self->DELETE( 0 );
# Unless the deletion above has cleared the array ...
if ( $length > 1 ) {
for (my $i = 0; $i < $length - 1; $i++) {
$self->_move_value( $i+1, $i );
}
$self->DELETE( $length - 1 );
}
$self->unlock;
return $content;
}
sub UNSHIFT {
my $self = shift->_get_self;
my @new_elements = @_;
$self->lock_exclusive;
my $length = $self->FETCHSIZE();
my $new_size = scalar @new_elements;
if ($length) {
for (my $i = $length - 1; $i >= 0; $i--) {
$self->_move_value( $i, $i+$new_size );
}
$self->STORESIZE( $length + $new_size );
}
for (my $i = 0; $i < $new_size; $i++) {
$self->STORE( $i, $new_elements[$i] );
}
$self->unlock;
return $length + $new_size;
}
sub SPLICE {
my $self = shift->_get_self;
$self->lock_exclusive;
my $length = $self->FETCHSIZE();
##
# Calculate offset and length of splice
##
my $offset = shift;
$offset = 0 unless defined $offset;
if ($offset < 0) { $offset += $length; }
my $splice_length;
if (scalar @_) { $splice_length = shift; }
else { $splice_length = $length - $offset; }
if ($splice_length < 0) { $splice_length += ($length - $offset); }
##
# Setup array with new elements, and copy out old elements for return
##
my @new_elements = @_;
my $new_size = scalar @new_elements;
my @old_elements = map {
$self->FETCH( $_ )
} $offset .. ($offset + $splice_length - 1);
##
# Adjust array length, and shift elements to accommodate new section.
##
if ( $new_size != $splice_length ) {
if ($new_size > $splice_length) {
for (my $i = $length - 1; $i >= $offset + $splice_length; $i--) {
$self->_move_value( $i, $i + ($new_size - $splice_length) );
}
$self->STORESIZE( $length + $new_size - $splice_length );
}
else {
for (my $i = $offset + $splice_length; $i < $length; $i++) {
$self->_move_value( $i, $i + ($new_size - $splice_length) );
}
for (my $i = 0; $i < $splice_length - $new_size; $i++) {
$self->DELETE( $length - 1 );
$length--;
}
}
}
##
# Insert new elements into array
##
for (my $i = $offset; $i < $offset + $new_size; $i++) {
$self->STORE( $i, shift @new_elements );
}
$self->unlock;
##
# Return deleted section, or last element in scalar context.
##
return wantarray ? @old_elements : $old_elements[-1];
}
# We don't need to populate it, yet.
# It will be useful, though, when we split out HASH and ARRAY
# Perl will call EXTEND() when the array is likely to grow.
# We don't care, but include it because it gets called at times.
sub EXTEND {}
sub _copy_node {
my $self = shift;
my ($db_temp) = @_;
my $length = $self->length();
for (my $index = 0; $index < $length; $index++) {
$self->_copy_value( \$db_temp->[$index], $self->get($index) );
}
return 1;
}
sub _clear {
my $self = shift;
my $size = $self->FETCHSIZE;
for my $key ( 0 .. $size - 1 ) {
$self->_engine->delete_key( $self, $key, $key );
}
$self->STORESIZE( 0 );
return;
}
sub length { (shift)->FETCHSIZE(@_) }
sub pop { (shift)->POP(@_) }
sub push { (shift)->PUSH(@_) }
sub unshift { (shift)->UNSHIFT(@_) }
sub splice { (shift)->SPLICE(@_) }
# This must be last otherwise we have to qualify all other calls to shift
# as calls to CORE::shift
sub shift { (CORE::shift)->SHIFT(@_) }
1;
__END__

View File

@@ -0,0 +1,181 @@
package DBM::Deep::ConfigData;
use strict;
my $arrayref = eval do {local $/; <DATA>}
or die "Couldn't load ConfigData data: $@";
close DATA;
my ($config, $features, $auto_features) = @$arrayref;
sub config { $config->{$_[1]} }
sub set_config { $config->{$_[1]} = $_[2] }
sub set_feature { $features->{$_[1]} = 0+!!$_[2] } # Constrain to 1 or 0
sub auto_feature_names { sort grep !exists $features->{$_}, keys %$auto_features }
sub feature_names {
my @features = (sort keys %$features, auto_feature_names());
@features;
}
sub config_names { sort keys %$config }
sub write {
my $me = __FILE__;
# Can't use Module::Build::Dumper here because M::B is only a
# build-time prereq of this module
require Data::Dumper;
my $mode_orig = (stat $me)[2] & 07777;
chmod($mode_orig | 0222, $me); # Make it writeable
open(my $fh, '+<', $me) or die "Can't rewrite $me: $!";
seek($fh, 0, 0);
while (<$fh>) {
last if /^__DATA__$/;
}
die "Couldn't find __DATA__ token in $me" if eof($fh);
seek($fh, tell($fh), 0);
my $data = [$config, $features, $auto_features];
print($fh 'do{ my '
. Data::Dumper->new([$data],['x'])->Purity(1)->Dump()
. '$x; }' );
truncate($fh, tell($fh));
close $fh;
chmod($mode_orig, $me)
or warn "Couldn't restore permissions on $me: $!";
}
sub feature {
my ($package, $key) = @_;
return $features->{$key} if exists $features->{$key};
my $info = $auto_features->{$key} or return 0;
require Module::Build; # XXX should get rid of this
foreach my $type (sort keys %$info) {
my $prereqs = $info->{$type};
next if $type eq 'description' || $type eq 'recommends';
foreach my $modname (sort keys %$prereqs) {
my $status = Module::Build->check_installed_status($modname, $prereqs->{$modname});
if ((!$status->{ok}) xor ($type =~ /conflicts$/)) { return 0; }
if ( ! eval "require $modname; 1" ) { return 0; }
}
}
return 1;
}
=head1 NAME
DBM::Deep::ConfigData - Configuration for DBM::Deep
=head1 SYNOPSIS
use DBM::Deep::ConfigData;
$value = DBM::Deep::ConfigData->config('foo');
$value = DBM::Deep::ConfigData->feature('bar');
@names = DBM::Deep::ConfigData->config_names;
@names = DBM::Deep::ConfigData->feature_names;
DBM::Deep::ConfigData->set_config(foo => $new_value);
DBM::Deep::ConfigData->set_feature(bar => $new_value);
DBM::Deep::ConfigData->write; # Save changes
=head1 DESCRIPTION
This module holds the configuration data for the C<DBM::Deep>
module. It also provides a programmatic interface for getting or
setting that configuration data. Note that in order to actually make
changes, you'll have to have write access to the C<DBM::Deep::ConfigData>
module, and you should attempt to understand the repercussions of your
actions.
=head1 METHODS
=over 4
=item config($name)
Given a string argument, returns the value of the configuration item
by that name, or C<undef> if no such item exists.
=item feature($name)
Given a string argument, returns the value of the feature by that
name, or C<undef> if no such feature exists.
=item set_config($name, $value)
Sets the configuration item with the given name to the given value.
The value may be any Perl scalar that will serialize correctly using
C<Data::Dumper>. This includes references, objects (usually), and
complex data structures. It probably does not include transient
things like filehandles or sockets.
=item set_feature($name, $value)
Sets the feature with the given name to the given boolean value. The
value will be converted to 0 or 1 automatically.
=item config_names()
Returns a list of all the names of config items currently defined in
C<DBM::Deep::ConfigData>, or in scalar context the number of items.
=item feature_names()
Returns a list of all the names of features currently defined in
C<DBM::Deep::ConfigData>, or in scalar context the number of features.
=item auto_feature_names()
Returns a list of all the names of features whose availability is
dynamically determined, or in scalar context the number of such
features. Does not include such features that have later been set to
a fixed value.
=item write()
Commits any changes from C<set_config()> and C<set_feature()> to disk.
Requires write access to the C<DBM::Deep::ConfigData> module.
=back
=head1 AUTHOR
C<DBM::Deep::ConfigData> was automatically created using C<Module::Build>.
C<Module::Build> was written by Ken Williams, but he holds no
authorship claim or copyright claim to the contents of C<DBM::Deep::ConfigData>.
=cut
__DATA__
do{ my $x = [
{},
{},
{
'mysql_engine' => {
'description' => 'DBI support via MySQL',
'requires' => {
'DBD::mysql' => '4.001',
'DBI' => '1.5'
}
},
'sqlite_engine' => {
'description' => 'DBI support via SQLite',
'requires' => {
'DBD::SQLite' => '1.25',
'DBI' => '1.5'
}
}
}
];
$x; }

View File

@@ -0,0 +1,215 @@
=head1 NAME
DBM::Deep::Cookbook - Cookbook for DBM::Deep
=head1 DESCRIPTION
This is the Cookbook for L<DBM::Deep>. It contains useful tips and tricks,
plus some examples of how to do common tasks.
=head1 RECIPES
=head2 Unicode data
If possible, it is highly recommended that you upgrade your database to
version 2 (using the F<utils/upgrade_db.pl> script in the CPAN
distribution), in order to use Unicode.
If your databases are still shared by perl installations with older
DBM::Deep versions, you can use filters to encode strings on the fly:
my $db = DBM::Deep->new( ... );
my $encode_sub = sub { my $s = shift; utf8::encode($s); $s };
my $decode_sub = sub { my $s = shift; utf8::decode($s); $s };
$db->set_filter( 'store_value' => $encode_sub );
$db->set_filter( 'fetch_value' => $decode_sub );
$db->set_filter( 'store_key' => $encode_sub );
$db->set_filter( 'fetch_key' => $decode_sub );
A previous version of this cookbook recommended using
C<binmode $db-E<gt>_fh, ":utf8">, but that is I<not> a good idea, as it
could easily corrupt the database.
=head2 Real-time Encryption Example
B<NOTE>: This is just an example of how to write a filter. This most
definitely should B<NOT> be taken as a proper way to write a filter that does
encryption. (Furthermore, it fails to take Unicode into account.)
Here is a working example that uses the I<Crypt::Blowfish> module to
do real-time encryption / decryption of keys & values with DBM::Deep Filters.
Please visit L<http://search.cpan.org/search?module=Crypt::Blowfish> for more
on I<Crypt::Blowfish>. You'll also need the I<Crypt::CBC> module.
use DBM::Deep;
use Crypt::Blowfish;
use Crypt::CBC;
my $cipher = Crypt::CBC->new({
'key' => 'my secret key',
'cipher' => 'Blowfish',
'iv' => '$KJh#(}q',
'regenerate_key' => 0,
'padding' => 'space',
'prepend_iv' => 0
});
my $db = DBM::Deep->new(
file => "foo-encrypt.db",
filter_store_key => \&my_encrypt,
filter_store_value => \&my_encrypt,
filter_fetch_key => \&my_decrypt,
filter_fetch_value => \&my_decrypt,
);
$db->{key1} = "value1";
$db->{key2} = "value2";
print "key1: " . $db->{key1} . "\n";
print "key2: " . $db->{key2} . "\n";
undef $db;
exit;
sub my_encrypt {
return $cipher->encrypt( $_[0] );
}
sub my_decrypt {
return $cipher->decrypt( $_[0] );
}
=head2 Real-time Compression Example
Here is a working example that uses the I<Compress::Zlib> module to do real-time
compression / decompression of keys & values with DBM::Deep Filters.
Please visit L<http://search.cpan.org/search?module=Compress::Zlib> for
more on I<Compress::Zlib>.
use DBM::Deep;
use Compress::Zlib;
my $db = DBM::Deep->new(
file => "foo-compress.db",
filter_store_key => \&my_compress,
filter_store_value => \&my_compress,
filter_fetch_key => \&my_decompress,
filter_fetch_value => \&my_decompress,
);
$db->{key1} = "value1";
$db->{key2} = "value2";
print "key1: " . $db->{key1} . "\n";
print "key2: " . $db->{key2} . "\n";
undef $db;
exit;
sub my_compress {
my $s = shift;
utf8::encode($s);
return Compress::Zlib::memGzip( $s ) ;
}
sub my_decompress {
my $s = Compress::Zlib::memGunzip( shift ) ;
utf8::decode($s);
return $s;
}
B<Note:> Filtering of keys only applies to hashes. Array "keys" are
actually numerical index numbers, and are not filtered.
=head1 Custom Digest Algorithm
DBM::Deep by default uses the I<Message Digest 5> (MD5) algorithm for hashing
keys. However you can override this, and use another algorithm (such as SHA-256)
or even write your own. But please note that DBM::Deep currently expects zero
collisions, so your algorithm has to be I<perfect>, so to speak. Collision
detection may be introduced in a later version.
You can specify a custom digest algorithm by passing it into the parameter
list for new(), passing a reference to a subroutine as the 'digest' parameter,
and the length of the algorithm's hashes (in bytes) as the 'hash_size'
parameter. Here is a working example that uses a 256-bit hash from the
I<Digest::SHA256> module. Please see
L<http://search.cpan.org/search?module=Digest::SHA256> for more information.
The value passed to your digest function will be encoded as UTF-8 if the
database is in version 2 format or higher.
use DBM::Deep;
use Digest::SHA256;
my $context = Digest::SHA256::new(256);
my $db = DBM::Deep->new(
filename => "foo-sha.db",
digest => \&my_digest,
hash_size => 32,
);
$db->{key1} = "value1";
$db->{key2} = "value2";
print "key1: " . $db->{key1} . "\n";
print "key2: " . $db->{key2} . "\n";
undef $db;
exit;
sub my_digest {
return substr( $context->hash($_[0]), 0, 32 );
}
B<Note:> Your returned digest strings must be B<EXACTLY> the number
of bytes you specify in the hash_size parameter (in this case 32). Undefined
behavior will occur otherwise.
B<Note:> If you do choose to use a custom digest algorithm, you must set it
every time you access this file. Otherwise, the default (MD5) will be used.
=head1 PERFORMANCE
Because DBM::Deep is a conncurrent datastore, every change is flushed to disk
immediately and every read goes to disk. This means that DBM::Deep functions
at the speed of disk (generally 10-20ms) vs. the speed of RAM (generally
50-70ns), or at least 150-200x slower than the comparable in-memory
datastructure in Perl.
There are several techniques you can use to speed up how DBM::Deep functions.
=over 4
=item * Put it on a ramdisk
The easiest and quickest mechanism to making DBM::Deep run faster is to create
a ramdisk and locate the DBM::Deep file there. Doing this as an option may
become a feature of DBM::Deep, assuming there is a good ramdisk wrapper on CPAN.
=item * Work at the tightest level possible
It is much faster to assign the level of your db that you are working with to
an intermediate variable than to re-look it up every time. Thus
# BAD
while ( my ($k, $v) = each %{$db->{foo}{bar}{baz}} ) {
...
}
# GOOD
my $x = $db->{foo}{bar}{baz};
while ( my ($k, $v) = each %$x ) {
...
}
=item * Make your file as tight as possible
If you know that you are not going to use more than 65K in your database,
consider using the C<pack_size =E<gt> 'small'> option. This will instruct
DBM::Deep to use 16bit addresses, meaning that the seek times will be less.
=back
=head1 SEE ALSO
L<DBM::Deep(3)>, L<Digest::MD5(3)>, L<Digest::SHA256(3)>,
L<Crypt::Blowfish(3)>, L<Compress::Zlib(3)>
=cut

View File

@@ -0,0 +1,442 @@
package DBM::Deep::Engine;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
no warnings 'recursion';
use DBM::Deep::Iterator ();
# File-wide notes:
# * Every method in here assumes that the storage has been appropriately
# safeguarded. This can be anything from flock() to some sort of manual
# mutex. But, it's the caller's responsibility to make sure that this has
# been done.
sub SIG_HASH () { 'H' }
sub SIG_ARRAY () { 'A' }
=head1 NAME
DBM::Deep::Engine - mediate mapping between DBM::Deep objects and storage medium
=head1 PURPOSE
This is an internal-use-only object for L<DBM::Deep>. It mediates the low-level
mapping between the L<DBM::Deep> objects and the storage medium.
The purpose of this documentation is to provide low-level documentation for
developers. It is B<not> intended to be used by the general public. This
documentation and what it documents can and will change without notice.
=head1 OVERVIEW
The engine exposes an API to the DBM::Deep objects (DBM::Deep, DBM::Deep::Array,
and DBM::Deep::Hash) for their use to access the actual stored values. This API
is the following:
=over 4
=item * new
=item * read_value
=item * get_classname
=item * make_reference
=item * key_exists
=item * delete_key
=item * write_value
=item * get_next_key
=item * setup
=item * clear
=item * begin_work
=item * commit
=item * rollback
=item * lock_exclusive
=item * lock_shared
=item * unlock
=back
They are explained in their own sections below. These methods, in turn, may
provide some bounds-checking, but primarily act to instantiate objects in the
Engine::Sector::* hierarchy and dispatch to them.
=head1 TRANSACTIONS
Transactions in DBM::Deep are implemented using a variant of MVCC. This attempts
to keep the amount of actual work done against the file low while still providing
Atomicity, Consistency, and Isolation. Durability, unfortunately, cannot be done
with only one file.
=head2 STALENESS
If another process uses a transaction slot and writes stuff to it, then
terminates, the data that process wrote is still within the file. In order to
address this, there is also a transaction staleness counter associated within
every write. Each time a transaction is started, that process increments that
transaction's staleness counter. If, when it reads a value, the staleness
counters aren't identical, DBM::Deep will consider the value on disk to be stale
and discard it.
=head2 DURABILITY
The fourth leg of ACID is Durability, the guarantee that when a commit returns,
the data will be there the next time you read from it. This should be regardless
of any crashes or powerdowns in between the commit and subsequent read.
DBM::Deep does provide that guarantee; once the commit returns, all of the data
has been transferred from the transaction shadow to the HEAD. The issue arises
with partial commits - a commit that is interrupted in some fashion. In keeping
with DBM::Deep's "tradition" of very light error-checking and non-existent
error-handling, there is no way to recover from a partial commit. (This is
probably a failure in Consistency as well as Durability.)
Other DBMSes use transaction logs (a separate file, generally) to achieve
Durability. As DBM::Deep is a single-file, we would have to do something
similar to what SQLite and BDB do in terms of committing using synchronized
writes. To do this, we would have to use a much higher RAM footprint and some
serious programming that makes my head hurt just to think about it.
=cut
=head1 METHODS
=head2 read_value( $obj, $key )
This takes an object that provides _base_offset() and a string. It returns the
value stored in the corresponding Sector::Value's data section.
=cut
sub read_value { die "read_value must be implemented in a child class" }
=head2 get_classname( $obj )
This takes an object that provides _base_offset() and returns the classname (if
any) associated with it.
It delegates to Sector::Reference::get_classname() for the heavy lifting.
It performs a staleness check.
=cut
sub get_classname { die "get_classname must be implemented in a child class" }
=head2 make_reference( $obj, $old_key, $new_key )
This takes an object that provides _base_offset() and two strings. The
strings correspond to the old key and new key, respectively. This operation
is equivalent to (given C<< $db->{foo} = []; >>) C<< $db->{bar} = $db->{foo} >>.
This returns nothing.
=cut
sub make_reference { die "make_reference must be implemented in a child class" }
=head2 key_exists( $obj, $key )
This takes an object that provides _base_offset() and a string for
the key to be checked. This returns 1 for true and "" for false.
=cut
sub key_exists { die "key_exists must be implemented in a child class" }
=head2 delete_key( $obj, $key )
This takes an object that provides _base_offset() and a string for
the key to be deleted. This returns the result of the Sector::Reference
delete_key() method.
=cut
sub delete_key { die "delete_key must be implemented in a child class" }
=head2 write_value( $obj, $key, $value )
This takes an object that provides _base_offset(), a string for the
key, and a value. This value can be anything storable within L<DBM::Deep>.
This returns 1 upon success.
=cut
sub write_value { die "write_value must be implemented in a child class" }
=head2 setup( $obj )
This takes an object that provides _base_offset(). It will do everything needed
in order to properly initialize all values for necessary functioning. If this is
called upon an already initialized object, this will also reset the inode.
This returns 1.
=cut
sub setup { die "setup must be implemented in a child class" }
=head2 begin_work( $obj )
This takes an object that provides _base_offset(). It will set up all necessary
bookkeeping in order to run all work within a transaction.
If $obj is already within a transaction, an error will be thrown. If there are
no more available transactions, an error will be thrown.
This returns undef.
=cut
sub begin_work { die "begin_work must be implemented in a child class" }
=head2 rollback( $obj )
This takes an object that provides _base_offset(). It will revert all
actions taken within the running transaction.
If $obj is not within a transaction, an error will be thrown.
This returns 1.
=cut
sub rollback { die "rollback must be implemented in a child class" }
=head2 commit( $obj )
This takes an object that provides _base_offset(). It will apply all
actions taken within the transaction to the HEAD.
If $obj is not within a transaction, an error will be thrown.
This returns 1.
=cut
sub commit { die "commit must be implemented in a child class" }
=head2 get_next_key( $obj, $prev_key )
This takes an object that provides _base_offset() and an optional string
representing the prior key returned via a prior invocation of this method.
This method delegates to C<< DBM::Deep::Iterator->get_next_key() >>.
=cut
# XXX Add staleness here
sub get_next_key {
my $self = shift;
my ($obj, $prev_key) = @_;
# XXX Need to add logic about resetting the iterator if any key in the
# reference has changed
unless ( defined $prev_key ) {
eval "use " . $self->iterator_class; die $@ if $@;
$obj->{iterator} = $self->iterator_class->new({
base_offset => $obj->_base_offset,
engine => $self,
});
}
return $obj->{iterator}->get_next_key( $obj );
}
=head2 lock_exclusive()
This takes an object that provides _base_offset(). It will guarantee that
the storage has taken precautions to be safe for a write.
This returns nothing.
=cut
sub lock_exclusive {
my $self = shift;
my ($obj) = @_;
return $self->storage->lock_exclusive( $obj );
}
=head2 lock_shared()
This takes an object that provides _base_offset(). It will guarantee that
the storage has taken precautions to be safe for a read.
This returns nothing.
=cut
sub lock_shared {
my $self = shift;
my ($obj) = @_;
return $self->storage->lock_shared( $obj );
}
=head2 unlock()
This takes an object that provides _base_offset(). It will guarantee that
the storage has released the most recently-taken lock.
This returns nothing.
=cut
sub unlock {
my $self = shift;
my ($obj) = @_;
my $rv = $self->storage->unlock( $obj );
$self->flush if $rv;
return $rv;
}
=head1 INTERNAL METHODS
The following methods are internal-use-only to DBM::Deep::Engine and its
child classes.
=cut
=head2 flush()
This takes no arguments. It will do everything necessary to flush all things to
disk. This is usually called during unlock() and setup().
This returns nothing.
=cut
sub flush {
my $self = shift;
# Why do we need to have the storage flush? Shouldn't autoflush take care of
# things? -RobK, 2008-06-26
$self->storage->flush;
return;
}
=head2 load_sector( $loc )
This takes an id/location/offset and loads the sector based on the engine's
defined sector type.
=cut
sub load_sector { $_[0]->sector_type->load( @_ ) }
=head2 clear( $obj )
This takes an object that provides _base_offset() and deletes all its
elements, returning nothing.
=cut
sub clear { die "clear must be implemented in a child class" }
=head2 cache / clear_cache
This is the cache of loaded Reference sectors.
=cut
sub cache { $_[0]{cache} ||= {} }
sub clear_cache { %{$_[0]->cache} = () }
=head2 supports( $option )
This returns a boolean depending on if this instance of DBM::Dep supports
that feature. C<$option> can be one of:
=over 4
=item * transactions
=item * singletons
=back
Any other value will return false.
=cut
sub supports { die "supports must be implemented in a child class" }
=head1 ACCESSORS
The following are readonly attributes.
=over 4
=item * storage
=item * sector_type
=item * iterator_class
=back
=cut
sub storage { $_[0]{storage} }
sub sector_type { die "sector_type must be implemented in a child class" }
sub iterator_class { die "iterator_class must be implemented in a child class" }
# This code is to make sure we write all the values in the $value to the
# disk and to make sure all changes to $value after the assignment are
# reflected on disk. This may be counter-intuitive at first, but it is
# correct dwimmery.
# NOTE - simply tying $value won't perform a STORE on each value. Hence,
# the copy to a temp value.
sub _descend {
my $self = shift;
my ($value, $value_sector) = @_;
my $r = Scalar::Util::reftype( $value ) || '';
if ( $r eq 'ARRAY' ) {
my @temp = @$value;
tie @$value, 'DBM::Deep', {
base_offset => $value_sector->offset,
staleness => $value_sector->staleness,
storage => $self->storage,
engine => $self,
};
@$value = @temp;
bless $value, 'DBM::Deep::Array' unless Scalar::Util::blessed( $value );
}
elsif ( $r eq 'HASH' ) {
my %temp = %$value;
tie %$value, 'DBM::Deep', {
base_offset => $value_sector->offset,
staleness => $value_sector->staleness,
storage => $self->storage,
engine => $self,
};
%$value = %temp;
bless $value, 'DBM::Deep::Hash' unless Scalar::Util::blessed( $value );
}
return;
}
1;
__END__

View File

@@ -0,0 +1,367 @@
package DBM::Deep::Engine::DBI;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
no warnings 'recursion';
use base 'DBM::Deep::Engine';
use DBM::Deep::Sector::DBI ();
use DBM::Deep::Storage::DBI ();
sub sector_type { 'DBM::Deep::Sector::DBI' }
sub iterator_class { 'DBM::Deep::Iterator::DBI' }
sub new {
my $class = shift;
my ($args) = @_;
$args->{storage} = DBM::Deep::Storage::DBI->new( $args )
unless exists $args->{storage};
my $self = bless {
storage => undef,
external_refs => undef,
}, $class;
# Grab the parameters we want to use
foreach my $param ( keys %$self ) {
next unless exists $args->{$param};
$self->{$param} = $args->{$param};
}
return $self;
}
sub setup {
my $self = shift;
my ($obj) = @_;
# Default the id to 1. This means that we will be creating a row if there
# isn't one. The assumption is that the row_id=1 cannot never be deleted. I
# don't know if this is a good assumption.
$obj->{base_offset} ||= 1;
my ($rows) = $self->storage->read_from(
refs => $obj->_base_offset,
qw( ref_type ),
);
# We don't have a row yet.
unless ( @$rows ) {
$self->storage->write_to(
refs => $obj->_base_offset,
ref_type => $obj->_type,
);
}
my $sector = DBM::Deep::Sector::DBI::Reference->new({
engine => $self,
offset => $obj->_base_offset,
});
}
sub read_value {
my $self = shift;
my ($obj, $key) = @_;
my $sector = $self->load_sector( $obj->_base_offset, 'refs' )
or return;
# if ( $sector->staleness != $obj->_staleness ) {
# return;
# }
# my $key_md5 = $self->_apply_digest( $key );
my $value_sector = $sector->get_data_for({
key => $key,
# key_md5 => $key_md5,
allow_head => 1,
});
unless ( $value_sector ) {
return undef
}
return $value_sector->data;
}
sub get_classname {
my $self = shift;
my ($obj) = @_;
my $sector = $self->load_sector( $obj->_base_offset, 'refs' )
or return;
return $sector->get_classname;
}
sub make_reference {
my $self = shift;
my ($obj, $old_key, $new_key) = @_;
my $sector = $self->load_sector( $obj->_base_offset, 'refs' )
or return;
# if ( $sector->staleness != $obj->_staleness ) {
# return;
# }
my $value_sector = $sector->get_data_for({
key => $old_key,
allow_head => 1,
});
unless ( $value_sector ) {
$value_sector = DBM::Deep::Sector::DBI::Scalar->new({
engine => $self,
data => undef,
});
$sector->write_data({
key => $old_key,
value => $value_sector,
});
}
if ( $value_sector->isa( 'DBM::Deep::Sector::DBI::Reference' ) ) {
$sector->write_data({
key => $new_key,
value => $value_sector,
});
$value_sector->increment_refcount;
}
else {
$sector->write_data({
key => $new_key,
value => $value_sector->clone,
});
}
return;
}
# exists returns '', not undefined.
sub key_exists {
my $self = shift;
my ($obj, $key) = @_;
my $sector = $self->load_sector( $obj->_base_offset, 'refs' )
or return '';
# if ( $sector->staleness != $obj->_staleness ) {
# return '';
# }
my $data = $sector->get_data_for({
# key_md5 => $self->_apply_digest( $key ),
key => $key,
allow_head => 1,
});
# exists() returns 1 or '' for true/false.
return $data ? 1 : '';
}
sub delete_key {
my $self = shift;
my ($obj, $key) = @_;
my $sector = $self->load_sector( $obj->_base_offset, 'refs' )
or return '';
# if ( $sector->staleness != $obj->_staleness ) {
# return '';
# }
return $sector->delete_key({
# key_md5 => $self->_apply_digest( $key ),
key => $key,
allow_head => 0,
});
}
sub write_value {
my $self = shift;
my ($obj, $key, $value) = @_;
my $r = Scalar::Util::reftype( $value ) || '';
{
last if $r eq '';
last if $r eq 'HASH';
last if $r eq 'ARRAY';
DBM::Deep->_throw_error(
"Storage of references of type '$r' is not supported."
);
}
# Load the reference entry
# Determine if the row was deleted under us
#
my $sector = $self->load_sector( $obj->_base_offset, 'refs' )
or die "Cannot load sector at '@{[$obj->_base_offset]}'\n";;
my ($type, $class);
if (
$r eq 'ARRAY' || $r eq 'HASH' and ref $value ne 'DBM::Deep::Null'
) {
my $tmpvar;
if ( $r eq 'ARRAY' ) {
$tmpvar = tied @$value;
} elsif ( $r eq 'HASH' ) {
$tmpvar = tied %$value;
}
if ( $tmpvar ) {
my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $tmpvar->isa( 'DBM::Deep' ); };
unless ( $is_dbm_deep ) {
DBM::Deep->_throw_error( "Cannot store something that is tied." );
}
unless ( $tmpvar->_engine->storage == $self->storage ) {
DBM::Deep->_throw_error( "Cannot store values across DBM::Deep files. Please use export() instead." );
}
# Load $tmpvar's sector
# First, verify if we're storing the same thing to this spot. If we
# are, then this should be a no-op. -EJS, 2008-05-19
# See whether or not we are storing ourselves to ourself.
# Write the sector as data in this reference (keyed by $key)
my $value_sector = $self->load_sector( $tmpvar->_base_offset, 'refs' );
$sector->write_data({
key => $key,
# key_md5 => $self->_apply_digest( $key ),
value => $value_sector,
});
$value_sector->increment_refcount;
return 1;
}
$type = substr( $r, 0, 1 );
$class = 'DBM::Deep::Sector::DBI::Reference';
}
else {
if ( tied($value) ) {
DBM::Deep->_throw_error( "Cannot store something that is tied." );
}
if ( ref $value eq 'DBM::Deep::Null' ) {
DBM::Deep::_warnif(
'uninitialized', 'Assignment of stale reference'
);
$value = undef;
}
$class = 'DBM::Deep::Sector::DBI::Scalar';
$type = 'S';
}
# Create this after loading the reference sector in case something bad
# happens. This way, we won't allocate value sector(s) needlessly.
my $value_sector = $class->new({
engine => $self,
data => $value,
type => $type,
});
$sector->write_data({
key => $key,
# key_md5 => $self->_apply_digest( $key ),
value => $value_sector,
});
$self->_descend( $value, $value_sector );
return 1;
}
#sub begin_work {
# my $self = shift;
# die "Transactions are not supported by this engine"
# unless $self->supports('transactions');
#
# if ( $self->in_txn ) {
# DBM::Deep->_throw_error( "Cannot begin_work within an active transaction" );
# }
#
# $self->storage->begin_work;
#
# $self->in_txn( 1 );
#
# return 1;
#}
#
#sub rollback {
# my $self = shift;
# die "Transactions are not supported by this engine"
# unless $self->supports('transactions');
#
# if ( !$self->in_txn ) {
# DBM::Deep->_throw_error( "Cannot rollback without an active transaction" );
# }
#
# $self->storage->rollback;
#
# $self->in_txn( 0 );
#
# return 1;
#}
#
#sub commit {
# my $self = shift;
# die "Transactions are not supported by this engine"
# unless $self->supports('transactions');
#
# if ( !$self->in_txn ) {
# DBM::Deep->_throw_error( "Cannot commit without an active transaction" );
# }
#
# $self->storage->commit;
#
# $self->in_txn( 0 );
#
# return 1;
#}
#
#sub in_txn {
# my $self = shift;
# $self->{in_txn} = shift if @_;
# $self->{in_txn};
#}
sub supports {
my $self = shift;
my ($feature) = @_;
return if $feature eq 'transactions';
return 1 if $feature eq 'singletons';
return;
}
sub db_version {
return '1.0020'
}
sub clear {
my $self = shift;
my $obj = shift;
my $sector = $self->load_sector( $obj->_base_offset, 'refs' )
or return;
$sector->clear;
return;
}
1;
__END__

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,136 @@
package DBM::Deep::Hash;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
no warnings 'recursion';
use base 'DBM::Deep';
sub _get_self {
# See the note in Array.pm as to why this is commented out.
# eval { local $SIG{'__DIE__'}; tied( %{$_[0]} ) } || $_[0]
# During global destruction %{$_[0]} might get tied to undef, so we
# need to check that case if tied returns false.
tied %{$_[0]} or local *@, eval { exists $_[0]{_}; 1 } ? $_[0] : undef
}
sub _repr { return {} }
sub TIEHASH {
my $class = shift;
my $args = $class->_get_args( @_ );
$args->{type} = $class->TYPE_HASH;
return $class->_init($args);
}
sub FETCH {
my $self = shift->_get_self;
DBM::Deep->_throw_error( "Cannot use an undefined hash key." ) unless defined $_[0];
my $key = ($self->_engine->storage->{filter_store_key})
? $self->_engine->storage->{filter_store_key}->($_[0])
: $_[0];
return $self->SUPER::FETCH( $key, $_[0] );
}
sub STORE {
my $self = shift->_get_self;
DBM::Deep->_throw_error( "Cannot use an undefined hash key." ) unless defined $_[0];
my $key = ($self->_engine->storage->{filter_store_key})
? $self->_engine->storage->{filter_store_key}->($_[0])
: $_[0];
my $value = $_[1];
return $self->SUPER::STORE( $key, $value, $_[0] );
}
sub EXISTS {
my $self = shift->_get_self;
DBM::Deep->_throw_error( "Cannot use an undefined hash key." ) unless defined $_[0];
my $key = ($self->_engine->storage->{filter_store_key})
? $self->_engine->storage->{filter_store_key}->($_[0])
: $_[0];
return $self->SUPER::EXISTS( $key );
}
sub DELETE {
my $self = shift->_get_self;
DBM::Deep->_throw_error( "Cannot use an undefined hash key." ) unless defined $_[0];
my $key = ($self->_engine->storage->{filter_store_key})
? $self->_engine->storage->{filter_store_key}->($_[0])
: $_[0];
return $self->SUPER::DELETE( $key, $_[0] );
}
# Locate and return first key (in no particular order)
sub FIRSTKEY {
my $self = shift->_get_self;
$self->lock_shared;
my $result = $self->_engine->get_next_key( $self );
$self->unlock;
return ($result && $self->_engine->storage->{filter_fetch_key})
? $self->_engine->storage->{filter_fetch_key}->($result)
: $result;
}
# Return next key (in no particular order), given previous one
sub NEXTKEY {
my $self = shift->_get_self;
my $prev_key = ($self->_engine->storage->{filter_store_key})
? $self->_engine->storage->{filter_store_key}->($_[0])
: $_[0];
$self->lock_shared;
my $result = $self->_engine->get_next_key( $self, $prev_key );
$self->unlock;
return ($result && $self->_engine->storage->{filter_fetch_key})
? $self->_engine->storage->{filter_fetch_key}->($result)
: $result;
}
sub first_key { (shift)->FIRSTKEY(@_) }
sub next_key { (shift)->NEXTKEY(@_) }
sub _clear {
my $self = shift;
while ( defined(my $key = $self->first_key) ) {
do {
$self->_engine->delete_key( $self, $key, $key );
} while defined($key = $self->next_key($key));
}
return;
}
sub _copy_node {
my $self = shift;
my ($db_temp) = @_;
my $key = $self->first_key();
while (defined $key) {
my $value = $self->get($key);
$self->_copy_value( \$db_temp->{$key}, $value );
$key = $self->next_key($key);
}
return 1;
}
1;
__END__

View File

@@ -0,0 +1,347 @@
=head1 NAME
DBM::Deep::Internals - Out of date documentation on DBM::Deep internals
=head1 OUT OF DATE
This document is out-of-date. It describes an intermediate file format used
during the development from 0.983 to 1.0000. It will be rewritten soon.
So far, the description of the header format has been updated.
=head1 DESCRIPTION
This is a document describing the internal workings of L<DBM::Deep>. It is
not necessary to read this document if you only intend to be a user. This
document is intended for people who either want a deeper understanding of
specifics of how L<DBM::Deep> works or who wish to help program
L<DBM::Deep>.
=head1 CLASS LAYOUT
L<DBM::Deep> is broken up into five classes in three inheritance hierarchies.
=over 4
=item *
L<DBM::Deep> is the parent of L<DBM::Deep::Array> and L<DBM::Deep::Hash>.
These classes form the immediate interface to the outside world. They are the
classes that provide the TIE mechanisms as well as the OO methods.
=item *
L<DBM::Deep::Engine> is the layer that deals with the mechanics of reading
and writing to the file. This is where the logic of the file layout is
handled.
=item *
L<DBM::Deep::File> is the layer that deals with the physical file. As a
singleton that every other object has a reference to, it also provides a place
to handle datastructure-wide items, such as transactions.
=back
=head1 FILE LAYOUT
This describes the 1.0003 and 2.0000 formats, which internally are numbered
3 and 4, respectively. The internal numbers are used in this section. These
two formats are almost identical.
DBM::Deep uses a tagged file layout. Every section has a tag, a size, and then
the data.
=head2 File header
The file header consists of two parts. The first part is a fixed length of
13 bytes:
DPDB h VVVV SSSS
\ / | \ \
\/ '---. \ '--- size of the second part of the header
file \ '--- version
signature tag
=over 4
=item * File Signature
The first four bytes are 'DPDB' in network byte order, signifying that this is
a DBM::Deep file.
=item * File tag
A literal ASCII 'h', indicating that this is the header. The file used by
versions prior to 1.00 had a different fifth byte, allowing the difference
to be determined.
=item * Version
This is four bytes containing the file version. This lets the file format change over time.
It is packed in network order, so version 4 is stored as "\0\0\0\cD".
=item * Header size
The size of the second part of the header, in bytes. This number is also
packed in network order.
=back
The second part of the header is as follows:
S B S T T(TTTTTTTTT...) (SS SS SS SS ...) (continued...)
| | | | \ |
| | | '----------. \ staleness counters
| | '--------. \ txn bitfield
| '------. \ number of transactions
byte size \ data sector size
max buckets
(continuation...)
BB(BBBBBB) DD(DDDDDD) II(IIIIII)
| | |
| free data |
free blist free index
=over
=item * Constants
These are the file-wide constants that determine how the file is laid out.
They can only be set upon file creation.
The byte size is the number of bytes used to point to an offset elsewhere
in the file. This corresponds to the C<pack_size> option. This and the
next three values are stored as packed 8-bit integers (chars), so 2 is
represented by "\cB".
C<max_buckets> and C<data_sector_size> are documented in the main
L<DBM::Deep> man page. The number stored is actually one less than what is
passed to the constructor, to allow for a range of 1-256.
The number of transactions corresponds to the C<num_txns> value passed to
the constructor.
=item * Transaction information
The transaction bitfield consists of one bit for every available
transaction ID. It is therefore anywhere from 1 byte to 32 bytes long.
The staleness counters each take two bytes (packed 32-bit integers), one
for each transaction, not including the so-called HEAD (the main
transaction that all processes share I<before> calling C<begin_work>). So
these take up 0 to 508 bytes.
Staleness is explained in L<DBM::Deep::Engine|DBM::Deep::Engine/STALENESS>.
=item * Freespace information
Pointers into the first free sectors of the various sector sizes (Index,
Bucketlist, and Data) are stored here. These are called chains internally,
as each free sector points to the next one.
The number of bytes is determined by the byte size, ranging from 2 to 8.
=back
=head2 Index
The Index parts can be tagged either as Hash, Array, or Index. The latter
is if there was a reindexing due to a bucketlist growing too large. The others
are the root index for their respective datatypes. The index consists of a
tag, a size, and then 256 sections containing file locations. Each section
corresponds to each value representable in a byte.
The index is used as follows - whenever a hashed key is being looked up, the
first byte is used to determine which location to go to from the root index.
Then, if that's also an index, the second byte is used, and so forth until a
bucketlist is found.
=head2 Bucketlist
This is the part that contains the link to the data section. A bucketlist
defaults to being 16 buckets long (modifiable by the I<max_buckets>
parameter used when creating a new file). Each bucket contains an MD5 and a
location of the appropriate key section.
=head2 Key area
This is the part that handles transactional awareness. There are
I<max_buckets> sections. Each section contains the location to the data
section, a transaction ID, and whether that transaction considers this key to
be deleted or not.
=head2 Data area
This is the part that actual stores the key, value, and class (if
appropriate). The layout is:
=over 4
=item * tag
=item * length of the value
=item * the actual value
=item * keylength
=item * the actual key
=item * a byte indicating if this value has a classname
=item * the classname (if one is there)
=back
The key is stored after the value because the value is requested more often
than the key.
=head1 PERFORMANCE
L<DBM::Deep> is written completely in Perl. It also is a multi-process DBM
that uses the datafile as a method of synchronizing between multiple
processes. This is unlike most RDBMSes like MySQL and Oracle. Furthermore,
unlike all RDBMSes, L<DBM::Deep> stores both the data and the structure of
that data as it would appear in a Perl program.
=head2 CPU
DBM::Deep attempts to be CPU-light. As it stores all the data on disk,
DBM::Deep is I/O-bound, not CPU-bound.
=head2 RAM
DBM::Deep uses extremely little RAM relative to the amount of data you can
access. You can iterate through a million keys (using C<each()>) without
increasing your memory usage at all.
=head2 DISK
DBM::Deep is I/O-bound, pure and simple. The faster your disk, the faster
DBM::Deep will be. Currently, when performing C<my $x = $db-E<gt>{foo}>, there
are a minimum of 4 seeks and 1332 + N bytes read (where N is the length of your
data). (All values assume a medium filesize.) The actions taken are:
=over 4
=item 1 Lock the file
=item 1 Perform a stat() to determine if the inode has changed
=item 1 Go to the primary index for the $db (1 seek)
=item 1 Read the tag/size of the primary index (5 bytes)
=item 1 Read the body of the primary index (1024 bytes)
=item 1 Go to the bucketlist for this MD5 (1 seek)
=item 1 Read the tag/size of the bucketlist (5 bytes)
=item 1 Read the body of the bucketlist (144 bytes)
=item 1 Go to the keys location for this MD5 (1 seek)
=item 1 Read the tag/size of the keys section (5 bytes)
=item 1 Read the body of the keys location (144 bytes)
=item 1 Go to the data section that corresponds to this transaction ID. (1 seek)
=item 1 Read the tag/size of the data section (5 bytes)
=item 1 Read the value for this data (N bytes)
=item 1 Unlock the file
=back
Every additional level of indexing (if there are enough keys) requires an
additional seek and the reading of 1029 additional bytes. If the value is
blessed, an additional 1 seek and 9 + M bytes are read (where M is the length
of the classname).
Arrays are (currently) even worse because they're considered "funny hashes"
with the length stored as just another key. This means that if you do any sort
of lookup with a negative index, this entire process is performed twice - once
for the length and once for the value.
=head1 ACTUAL TESTS
=head2 SPEED
Obviously, DBM::Deep isn't going to be as fast as some C-based DBMs, such as
the almighty I<BerkeleyDB>. But it makes up for it in features like true
multi-level hash/array support, and cross-platform FTPable files. Even so,
DBM::Deep is still pretty fast, and the speed stays fairly consistent, even
with huge databases. Here is some test data:
Adding 1,000,000 keys to new DB file...
At 100 keys, avg. speed is 2,703 keys/sec
At 200 keys, avg. speed is 2,642 keys/sec
At 300 keys, avg. speed is 2,598 keys/sec
At 400 keys, avg. speed is 2,578 keys/sec
At 500 keys, avg. speed is 2,722 keys/sec
At 600 keys, avg. speed is 2,628 keys/sec
At 700 keys, avg. speed is 2,700 keys/sec
At 800 keys, avg. speed is 2,607 keys/sec
At 900 keys, avg. speed is 2,190 keys/sec
At 1,000 keys, avg. speed is 2,570 keys/sec
At 2,000 keys, avg. speed is 2,417 keys/sec
At 3,000 keys, avg. speed is 1,982 keys/sec
At 4,000 keys, avg. speed is 1,568 keys/sec
At 5,000 keys, avg. speed is 1,533 keys/sec
At 6,000 keys, avg. speed is 1,787 keys/sec
At 7,000 keys, avg. speed is 1,977 keys/sec
At 8,000 keys, avg. speed is 2,028 keys/sec
At 9,000 keys, avg. speed is 2,077 keys/sec
At 10,000 keys, avg. speed is 2,031 keys/sec
At 20,000 keys, avg. speed is 1,970 keys/sec
At 30,000 keys, avg. speed is 2,050 keys/sec
At 40,000 keys, avg. speed is 2,073 keys/sec
At 50,000 keys, avg. speed is 1,973 keys/sec
At 60,000 keys, avg. speed is 1,914 keys/sec
At 70,000 keys, avg. speed is 2,091 keys/sec
At 80,000 keys, avg. speed is 2,103 keys/sec
At 90,000 keys, avg. speed is 1,886 keys/sec
At 100,000 keys, avg. speed is 1,970 keys/sec
At 200,000 keys, avg. speed is 2,053 keys/sec
At 300,000 keys, avg. speed is 1,697 keys/sec
At 400,000 keys, avg. speed is 1,838 keys/sec
At 500,000 keys, avg. speed is 1,941 keys/sec
At 600,000 keys, avg. speed is 1,930 keys/sec
At 700,000 keys, avg. speed is 1,735 keys/sec
At 800,000 keys, avg. speed is 1,795 keys/sec
At 900,000 keys, avg. speed is 1,221 keys/sec
At 1,000,000 keys, avg. speed is 1,077 keys/sec
This test was performed on a PowerMac G4 1gHz running Mac OS X 10.3.2 & Perl
5.8.1, with an 80GB Ultra ATA/100 HD spinning at 7200RPM. The hash keys and
values were between 6 - 12 chars in length. The DB file ended up at 210MB.
Run time was 12 min 3 sec.
=head2 MEMORY USAGE
One of the great things about L<DBM::Deep> is that it uses very little memory.
Even with huge databases (1,000,000+ keys) you will not see much increased
memory on your process. L<DBM::Deep> relies solely on the filesystem for storing
and fetching data. Here is output from I<top> before even opening a database
handle:
PID USER PRI NI SIZE RSS SHARE STAT %CPU %MEM TIME COMMAND
22831 root 11 0 2716 2716 1296 R 0.0 0.2 0:07 perl
Basically the process is taking 2,716K of memory. And here is the same
process after storing and fetching 1,000,000 keys:
PID USER PRI NI SIZE RSS SHARE STAT %CPU %MEM TIME COMMAND
22831 root 14 0 2772 2772 1328 R 0.0 0.2 13:32 perl
Notice the memory usage increased by only 56K. Test was performed on a 700mHz
x86 box running Linux RedHat 7.2 & Perl 5.6.1.
=cut

View File

@@ -0,0 +1,73 @@
package DBM::Deep::Iterator;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
=head1 NAME
DBM::Deep::Iterator - iterator for FIRSTKEY() and NEXTKEY()
=head1 PURPOSE
This is an internal-use-only object for L<DBM::Deep>. It is the iterator
for FIRSTKEY() and NEXTKEY().
=head1 OVERVIEW
This object
=head1 METHODS
=head2 new(\%params)
The constructor takes a hashref of params. The hashref is assumed to have the
following elements:
=over 4
=item * engine (of type L<DBM::Deep::Engine>
=item * base_offset (the base_offset of the invoking DBM::Deep object)
=back
=cut
sub new {
my $class = shift;
my ($args) = @_;
my $self = bless {
engine => $args->{engine},
base_offset => $args->{base_offset},
}, $class;
Scalar::Util::weaken( $self->{engine} );
$self->reset;
return $self;
}
=head2 reset()
This method takes no arguments.
It will reset the iterator so that it will start from the beginning again.
This method returns nothing.
=cut
sub reset { die "reset must be implemented in a child class" }
=head2 get_next_key( $obj )
=cut
sub get_next_key { die "get_next_key must be implemented in a child class" }
1;
__END__

View File

@@ -0,0 +1,37 @@
package DBM::Deep::Iterator::DBI;
use strict;
use warnings FATAL => 'all';
use base qw( DBM::Deep::Iterator );
sub reset {
my $self = shift;
eval { $self->{sth}->finish; };
delete $self->{sth};
return;
}
sub get_next_key {
my $self = shift;
my ($obj) = @_;
unless ( exists $self->{sth} ) {
# For mysql, this needs to be RAND()
# For sqlite, this needs to be random()
my $storage = $self->{engine}->storage;
$self->{sth} = $storage->{dbh}->prepare(
"SELECT `key` FROM datas WHERE ref_id = ? ORDER BY "
. $storage->rand_function,
);
$self->{sth}->execute( $self->{base_offset} );
}
my ($key) = $self->{sth}->fetchrow_array;
return $key;
}
1;
__END__

View File

@@ -0,0 +1,104 @@
package DBM::Deep::Iterator::File;
use strict;
use warnings FATAL => 'all';
use base qw( DBM::Deep::Iterator );
use DBM::Deep::Iterator::File::BucketList ();
use DBM::Deep::Iterator::File::Index ();
sub reset { $_[0]{breadcrumbs} = []; return }
sub get_sector_iterator {
my $self = shift;
my ($loc) = @_;
my $sector = $self->{engine}->load_sector( $loc )
or return;
if ( $sector->isa( 'DBM::Deep::Sector::File::Index' ) ) {
return DBM::Deep::Iterator::File::Index->new({
iterator => $self,
sector => $sector,
});
}
elsif ( $sector->isa( 'DBM::Deep::Sector::File::BucketList' ) ) {
return DBM::Deep::Iterator::File::BucketList->new({
iterator => $self,
sector => $sector,
});
}
DBM::Deep->_throw_error( "get_sector_iterator(): Why did $loc make a $sector?" );
}
sub get_next_key {
my $self = shift;
my ($obj) = @_;
my $crumbs = $self->{breadcrumbs};
my $e = $self->{engine};
unless ( @$crumbs ) {
# This will be a Reference sector
my $sector = $e->load_sector( $self->{base_offset} )
# If no sector is found, this must have been deleted from under us.
or return;
if ( $sector->staleness != $obj->_staleness ) {
return;
}
my $loc = $sector->get_blist_loc
or return;
push @$crumbs, $self->get_sector_iterator( $loc );
}
FIND_NEXT_KEY: {
# We're at the end.
unless ( @$crumbs ) {
$self->reset;
return;
}
my $iterator = $crumbs->[-1];
# This level is done.
if ( $iterator->at_end ) {
pop @$crumbs;
redo FIND_NEXT_KEY;
}
if ( $iterator->isa( 'DBM::Deep::Iterator::File::Index' ) ) {
# If we don't have any more, it will be caught at the
# prior check.
if ( my $next = $iterator->get_next_iterator ) {
push @$crumbs, $next;
}
redo FIND_NEXT_KEY;
}
unless ( $iterator->isa( 'DBM::Deep::Iterator::File::BucketList' ) ) {
DBM::Deep->_throw_error(
"Should have a bucketlist iterator here - instead have $iterator"
);
}
# At this point, we have a BucketList iterator
my $key = $iterator->get_next_key;
if ( defined $key ) {
return $key;
}
#XXX else { $iterator->set_to_end() } ?
# We hit the end of the bucketlist iterator, so redo
redo FIND_NEXT_KEY;
}
DBM::Deep->_throw_error( "get_next_key(): How did we get here?" );
}
1;
__END__

View File

@@ -0,0 +1,90 @@
package DBM::Deep::Iterator::File::BucketList;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
=head1 NAME
DBM::Deep::Iterator::BucketList - mediate between DBM::Deep::Iterator and DBM::Deep::Engine::Sector::BucketList
=head1 PURPOSE
This is an internal-use-only object for L<DBM::Deep>. It acts as the mediator
between the L<DBM::Deep::Iterator> object and a L<DBM::Deep::Engine::Sector::BucketList>
sector.
=head1 OVERVIEW
This object, despite the implied class hierarchy, does B<NOT> inherit from
L<DBM::Deep::Iterator>. Instead, it delegates to it, essentially acting as a
facade over it. L<DBM::Deep::Iterator/get_next_key> will instantiate one of
these objects as needed to handle an BucketList sector.
=head1 METHODS
=head2 new(\%params)
The constructor takes a hashref of params and blesses it into the invoking class. The
hashref is assumed to have the following elements:
=over 4
=item * iterator (of type L<DBM::Deep::Iterator>
=item * sector (of type L<DBM::Deep::Engine::Sector::BucketList>
=back
=cut
sub new {
my $self = bless $_[1] => $_[0];
$self->{curr_index} = 0;
return $self;
}
=head2 at_end()
This takes no arguments.
This returns true/false indicating whether this sector has any more elements that can be
iterated over.
=cut
sub at_end {
my $self = shift;
return $self->{curr_index} >= $self->{iterator}{engine}->max_buckets;
}
=head2 get_next_iterator()
This takes no arguments.
This returns the next key pointed to by this bucketlist. This value is suitable for
returning by FIRSTKEY or NEXTKEY().
If the bucketlist is exhausted, it returns nothing.
=cut
sub get_next_key {
my $self = shift;
return if $self->at_end;
my $idx = $self->{curr_index}++;
my $data_loc = $self->{sector}->get_data_location_for({
allow_head => 1,
idx => $idx,
}) or return;
#XXX Do we want to add corruption checks here?
return $self->{sector}->get_key_for( $idx )->data;
}
1;
__END__

View File

@@ -0,0 +1,86 @@
package DBM::Deep::Iterator::File::Index;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
=head1 NAME
DBM::Deep::Iterator::Index - mediate between DBM::Deep::Iterator and DBM::Deep::Engine::Sector::Index
=head1 PURPOSE
This is an internal-use-only object for L<DBM::Deep>. It acts as the mediator
between the L<DBM::Deep::Iterator> object and a L<DBM::Deep::Engine::Sector::Index>
sector.
=head1 OVERVIEW
This object, despite the implied class hierarchy, does B<NOT> inherit from
L<DBM::Deep::Iterator>. Instead, it delegates to it, essentially acting as a
facade over it. L<DBM::Deep::Iterator/get_next_key> will instantiate one of
these objects as needed to handle an Index sector.
=head1 METHODS
=head2 new(\%params)
The constructor takes a hashref of params and blesses it into the invoking class. The
hashref is assumed to have the following elements:
=over 4
=item * iterator (of type L<DBM::Deep::Iterator>
=item * sector (of type L<DBM::Deep::Engine::Sector::Index>
=back
=cut
sub new {
my $self = bless $_[1] => $_[0];
$self->{curr_index} = 0;
return $self;
}
=head2 at_end()
This takes no arguments.
This returns true/false indicating whether this sector has any more elements that can be
iterated over.
=cut
sub at_end {
my $self = shift;
return $self->{curr_index} >= $self->{iterator}{engine}->hash_chars;
}
=head2 get_next_iterator()
This takes no arguments.
This returns an iterator (built by L<DBM::Deep::Iterator/get_sector_iterator>) based
on the sector pointed to by the next occupied location in this index.
If the sector is exhausted, it returns nothing.
=cut
sub get_next_iterator {
my $self = shift;
my $loc;
while ( !$loc ) {
return if $self->at_end;
$loc = $self->{sector}->get_entry( $self->{curr_index}++ );
}
return $self->{iterator}->get_sector_iterator( $loc );
}
1;
__END__

View File

@@ -0,0 +1,49 @@
package DBM::Deep::Null;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
=head1 NAME
DBM::Deep::Null - NULL object
=head1 PURPOSE
This is an internal-use-only object for L<DBM::Deep>. It acts as a NULL object
in the same vein as MARCEL's L<Class::Null>. I couldn't use L<Class::Null>
because DBM::Deep needed an object that always evaluated as undef, not an
implementation of the Null Class pattern.
=head1 OVERVIEW
It is used to represent null sectors in DBM::Deep.
=cut
use overload
'bool' => sub { undef },
'""' => sub { undef },
'0+' => sub { 0 },
('cmp' =>
'<=>' => sub {
return 0 if !defined $_[1] || !length $_[1];
return $_[2] ? 1 : -1;
}
)[0,2,1,2], # same sub for both ops
'%{}' => sub {
require Carp;
Carp::croak("Can't use a stale reference as a HASH");
},
'@{}' => sub {
require Carp;
Carp::croak("Can't use a stale reference as an ARRAY");
},
fallback => 1,
nomethod => 'AUTOLOAD';
sub AUTOLOAD { return; }
1;
__END__

View File

@@ -0,0 +1,37 @@
package DBM::Deep::Sector;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
use Scalar::Util ();
sub new {
my $self = bless $_[1], $_[0];
Scalar::Util::weaken( $self->{engine} );
$self->_init;
return $self;
}
sub _init {}
sub clone {
my $self = shift;
return ref($self)->new({
engine => $self->engine,
type => $self->type,
data => $self->data,
});
}
sub engine { $_[0]{engine} }
sub offset { $_[0]{offset} }
sub type { $_[0]{type} }
sub staleness { $_[0]{staleness} }
sub load { die "load must be implemented in a child class" }
1;
__END__

View File

@@ -0,0 +1,55 @@
package DBM::Deep::Sector::DBI;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
use base qw( DBM::Deep::Sector );
use DBM::Deep::Sector::DBI::Reference ();
use DBM::Deep::Sector::DBI::Scalar ();
sub free {
my $self = shift;
$self->engine->storage->delete_from(
$self->table, $self->offset,
);
}
sub reload {
my $self = shift;
$self->_init;
}
sub load {
my $self = shift;
my ($engine, $offset, $type) = @_;
if ( !defined $type || $type eq 'refs' ) {
return DBM::Deep::Sector::DBI::Reference->new({
engine => $engine,
offset => $offset,
});
}
elsif ( $type eq 'datas' ) {
my $sector = DBM::Deep::Sector::DBI::Scalar->new({
engine => $engine,
offset => $offset,
});
if ( $sector->{data_type} eq 'R' ) {
return $self->load(
$engine, $sector->{value}, 'refs',
);
}
return $sector;
}
DBM::Deep->_throw_error( "'$offset': Don't know what to do with type '$type'" );
}
1;
__END__

View File

@@ -0,0 +1,238 @@
package DBM::Deep::Sector::DBI::Reference;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
use base 'DBM::Deep::Sector::DBI';
use Scalar::Util;
sub table { 'refs' }
sub _init {
my $self = shift;
my $e = $self->engine;
unless ( $self->offset ) {
my $classname = Scalar::Util::blessed( delete $self->{data} );
$self->{offset} = $self->engine->storage->write_to(
refs => undef,
ref_type => $self->type,
classname => $classname,
);
}
else {
my ($rows) = $self->engine->storage->read_from(
refs => $self->offset,
qw( ref_type ),
);
$self->{type} = $rows->[0]{ref_type};
}
return;
}
sub get_data_for {
my $self = shift;
my ($args) = @_;
my ($rows) = $self->engine->storage->read_from(
datas => { ref_id => $self->offset, key => $args->{key} },
qw( id ),
);
return unless $rows->[0]{id};
$self->load(
$self->engine,
$rows->[0]{id},
'datas',
);
}
sub write_data {
my $self = shift;
my ($args) = @_;
if ( ( $args->{value}->type || 'S' ) eq 'S' ) {
$args->{value}{offset} = $self->engine->storage->write_to(
datas => $args->{value}{offset},
ref_id => $self->offset,
data_type => 'S',
key => $args->{key},
value => $args->{value}{data},
);
$args->{value}->reload;
}
else {
# Write the Scalar of the Reference
$self->engine->storage->write_to(
datas => undef,
ref_id => $self->offset,
data_type => 'R',
key => $args->{key},
value => $args->{value}{offset},
);
}
}
sub delete_key {
my $self = shift;
my ($args) = @_;
my $old_value = $self->get_data_for({
key => $args->{key},
});
my $data;
if ( $old_value ) {
$data = $old_value->data({ export => 1 });
$self->engine->storage->delete_from(
'datas',
{ ref_id => $self->offset,
key => $args->{key}, },
);
$old_value->free;
}
return $data;
}
sub get_classname {
my $self = shift;
my ($rows) = $self->engine->storage->read_from(
'refs', $self->offset,
qw( classname ),
);
return unless @$rows;
return $rows->[0]{classname};
}
# Look to hoist this method into a ::Reference trait
sub data {
my $self = shift;
my ($args) = @_;
$args ||= {};
my $engine = $self->engine;
my $cache = $engine->cache;
my $off = $self->offset;
my $obj;
if ( !defined $cache->{ $off } ) {
$obj = DBM::Deep->new({
type => $self->type,
base_offset => $self->offset,
storage => $engine->storage,
engine => $engine,
});
$cache->{$off} = $obj;
Scalar::Util::weaken($cache->{$off});
}
else {
$obj = $cache->{$off};
}
# We're not exporting, so just return.
unless ( $args->{export} ) {
if ( $engine->storage->{autobless} ) {
my $classname = $self->get_classname;
if ( defined $classname ) {
bless $obj, $classname;
}
}
return $obj;
}
# We shouldn't export if this is still referred to.
if ( $self->get_refcount > 1 ) {
return $obj;
}
return $obj->export;
}
sub free {
my $self = shift;
# We're not ready to be removed yet.
return if $self->decrement_refcount > 0;
# Rebless the object into DBM::Deep::Null.
# In external_refs mode, this will already have been removed from
# the cache, so we can skip this.
my $e = $self->engine;
if(!$e->{external_refs}) {
eval { %{ $e->cache->{ $self->offset } } = (); };
eval { @{ $e->cache->{ $self->offset } } = (); };
bless $e->cache->{ $self->offset }, 'DBM::Deep::Null';
delete $e->cache->{ $self->offset };
}
$e->storage->delete_from(
'datas', { ref_id => $self->offset },
);
$e->storage->delete_from(
'datas', { value => $self->offset, data_type => 'R' },
);
$self->SUPER::free( @_ );
}
sub increment_refcount {
my $self = shift;
my $refcount = $self->get_refcount;
$refcount++;
$self->write_refcount( $refcount );
return $refcount;
}
sub decrement_refcount {
my $self = shift;
my $refcount = $self->get_refcount;
$refcount--;
$self->write_refcount( $refcount );
return $refcount;
}
sub get_refcount {
my $self = shift;
my ($rows) = $self->engine->storage->read_from(
'refs', $self->offset,
qw( refcount ),
);
return $rows->[0]{refcount};
}
sub write_refcount {
my $self = shift;
my ($num) = @_;
$self->engine->storage->{dbh}->do(
"UPDATE refs SET refcount = ? WHERE id = ?", undef,
$num, $self->offset,
);
}
sub clear {
my $self = shift;
DBM::Deep->new({
type => $self->type,
base_offset => $self->offset,
storage => $self->engine->storage,
engine => $self->engine,
})->_clear;
return;
}
1;
__END__

View File

@@ -0,0 +1,31 @@
package DBM::Deep::Sector::DBI::Scalar;
use strict;
use warnings FATAL => 'all';
use base qw( DBM::Deep::Sector::DBI );
sub table { 'datas' }
sub _init {
my $self = shift;
if ( $self->offset ) {
my ($rows) = $self->engine->storage->read_from(
datas => $self->offset,
qw( id data_type key value ),
);
$self->{$_} = $rows->[0]{$_} for qw( data_type key value );
}
return;
}
sub data {
my $self = shift;
$self->{value};
}
1;
__END__

View File

@@ -0,0 +1,104 @@
package DBM::Deep::Sector::File;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
use base qw( DBM::Deep::Sector );
use DBM::Deep::Sector::File::BucketList ();
use DBM::Deep::Sector::File::Index ();
use DBM::Deep::Sector::File::Null ();
use DBM::Deep::Sector::File::Reference ();
use DBM::Deep::Sector::File::Scalar ();
my $STALE_SIZE = 2;
sub base_size {
my $self = shift;
return $self->engine->SIG_SIZE + $STALE_SIZE;
}
sub free_meth { die "free_meth must be implemented in a child class" }
sub free {
my $self = shift;
my $e = $self->engine;
$e->storage->print_at( $self->offset, $e->SIG_FREE );
# Skip staleness counter
$e->storage->print_at( $self->offset + $self->base_size,
chr(0) x ($self->size - $self->base_size),
);
my $free_meth = $self->free_meth;
$e->$free_meth( $self->offset, $self->size );
return;
}
#=head2 load( $offset )
#
#This will instantiate and return the sector object that represents the data
#found at $offset.
#
#=cut
sub load {
my $self = shift;
my ($engine, $offset) = @_;
# Add a catch for offset of 0 or 1
return if !$offset || $offset <= 1;
my $type = $engine->storage->read_at( $offset, 1 );
return if $type eq chr(0);
if ( $type eq $engine->SIG_ARRAY || $type eq $engine->SIG_HASH ) {
return DBM::Deep::Sector::File::Reference->new({
engine => $engine,
type => $type,
offset => $offset,
});
}
# XXX Don't we need key_md5 here?
elsif ( $type eq $engine->SIG_BLIST ) {
return DBM::Deep::Sector::File::BucketList->new({
engine => $engine,
type => $type,
offset => $offset,
});
}
elsif ( $type eq $engine->SIG_INDEX ) {
return DBM::Deep::Sector::File::Index->new({
engine => $engine,
type => $type,
offset => $offset,
});
}
elsif ( $type eq $engine->SIG_NULL ) {
return DBM::Deep::Sector::File::Null->new({
engine => $engine,
type => $type,
offset => $offset,
});
}
elsif ( $type eq $engine->SIG_DATA || $type eq $engine->SIG_UNIDATA ) {
return DBM::Deep::Sector::File::Scalar->new({
engine => $engine,
type => $type,
offset => $offset,
});
}
# This was deleted from under us, so just return and let the caller figure it out.
elsif ( $type eq $engine->SIG_FREE ) {
return;
}
DBM::Deep->_throw_error( "'$offset': Don't know what to do with type '$type'" );
}
1;
__END__

View File

@@ -0,0 +1,376 @@
package DBM::Deep::Sector::File::BucketList;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
use base qw( DBM::Deep::Sector::File );
my $STALE_SIZE = 2;
# Please refer to the pack() documentation for further information
my %StP = (
1 => 'C', # Unsigned char value (no order needed as it's just one byte)
2 => 'n', # Unsigned short in "network" (big-endian) order
4 => 'N', # Unsigned long in "network" (big-endian) order
8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
);
sub _init {
my $self = shift;
my $engine = $self->engine;
unless ( $self->offset ) {
my $leftover = $self->size - $self->base_size;
$self->{offset} = $engine->_request_blist_sector( $self->size );
$engine->storage->print_at( $self->offset, $engine->SIG_BLIST ); # Sector type
# Skip staleness counter
$engine->storage->print_at( $self->offset + $self->base_size,
chr(0) x $leftover, # Zero-fill the data
);
}
if ( $self->{key_md5} ) {
$self->find_md5;
}
return $self;
}
sub wipe {
my $self = shift;
$self->engine->storage->print_at( $self->offset + $self->base_size,
chr(0) x ($self->size - $self->base_size), # Zero-fill the data
);
}
sub size {
my $self = shift;
unless ( $self->{size} ) {
my $e = $self->engine;
# Base + numbuckets * bucketsize
$self->{size} = $self->base_size + $e->max_buckets * $self->bucket_size;
}
return $self->{size};
}
sub free_meth { '_add_free_blist_sector' }
sub free {
my $self = shift;
my $e = $self->engine;
foreach my $bucket ( $self->chopped_up ) {
my $rest = $bucket->[-1];
# Delete the keysector
my $l = unpack( $StP{$e->byte_size}, substr( $rest, $e->hash_size, $e->byte_size ) );
my $s = $e->load_sector( $l ); $s->free if $s;
# Delete the HEAD sector
$l = unpack( $StP{$e->byte_size},
substr( $rest,
$e->hash_size + $e->byte_size,
$e->byte_size,
),
);
$s = $e->load_sector( $l ); $s->free if $s;
foreach my $txn ( 0 .. $e->num_txns - 2 ) {
my $l = unpack( $StP{$e->byte_size},
substr( $rest,
$e->hash_size + 2 * $e->byte_size + $txn * ($e->byte_size + $STALE_SIZE),
$e->byte_size,
),
);
my $s = $e->load_sector( $l ); $s->free if $s;
}
}
$self->SUPER::free();
}
sub bucket_size {
my $self = shift;
unless ( $self->{bucket_size} ) {
my $e = $self->engine;
# Key + head (location) + transactions (location + staleness-counter)
my $location_size = $e->byte_size + $e->byte_size + ($e->num_txns - 1) * ($e->byte_size + $STALE_SIZE);
$self->{bucket_size} = $e->hash_size + $location_size;
}
return $self->{bucket_size};
}
# XXX This is such a poor hack. I need to rethink this code.
sub chopped_up {
my $self = shift;
my $e = $self->engine;
my @buckets;
foreach my $idx ( 0 .. $e->max_buckets - 1 ) {
my $spot = $self->offset + $self->base_size + $idx * $self->bucket_size;
my $md5 = $e->storage->read_at( $spot, $e->hash_size );
#XXX If we're chopping, why would we ever have the blank_md5?
last if $md5 eq $e->blank_md5;
my $rest = $e->storage->read_at( undef, $self->bucket_size - $e->hash_size );
push @buckets, [ $spot, $md5 . $rest ];
}
return @buckets;
}
sub write_at_next_open {
my $self = shift;
my ($entry) = @_;
#XXX This is such a hack!
$self->{_next_open} = 0 unless exists $self->{_next_open};
my $spot = $self->offset + $self->base_size + $self->{_next_open}++ * $self->bucket_size;
$self->engine->storage->print_at( $spot, $entry );
return $spot;
}
sub has_md5 {
my $self = shift;
unless ( exists $self->{found} ) {
$self->find_md5;
}
return $self->{found};
}
sub find_md5 {
my $self = shift;
$self->{found} = undef;
$self->{idx} = -1;
if ( @_ ) {
$self->{key_md5} = shift;
}
# If we don't have an MD5, then what are we supposed to do?
unless ( exists $self->{key_md5} ) {
DBM::Deep->_throw_error( "Cannot find_md5 without a key_md5 set" );
}
my $e = $self->engine;
foreach my $idx ( 0 .. $e->max_buckets - 1 ) {
my $potential = $e->storage->read_at(
$self->offset + $self->base_size + $idx * $self->bucket_size, $e->hash_size,
);
if ( $potential eq $e->blank_md5 ) {
$self->{idx} = $idx;
return;
}
if ( $potential eq $self->{key_md5} ) {
$self->{found} = 1;
$self->{idx} = $idx;
return;
}
}
return;
}
sub write_md5 {
my $self = shift;
my ($args) = @_;
DBM::Deep->_throw_error( "write_md5: no key" ) unless exists $args->{key};
DBM::Deep->_throw_error( "write_md5: no key_md5" ) unless exists $args->{key_md5};
DBM::Deep->_throw_error( "write_md5: no value" ) unless exists $args->{value};
my $engine = $self->engine;
$args->{trans_id} = $engine->trans_id unless exists $args->{trans_id};
my $spot = $self->offset + $self->base_size + $self->{idx} * $self->bucket_size;
$engine->add_entry( $args->{trans_id}, $spot );
unless ($self->{found}) {
my $key_sector = DBM::Deep::Sector::File::Scalar->new({
engine => $engine,
data => $args->{key},
});
$engine->storage->print_at( $spot,
$args->{key_md5},
pack( $StP{$engine->byte_size}, $key_sector->offset ),
);
}
my $loc = $spot
+ $engine->hash_size
+ $engine->byte_size;
if ( $args->{trans_id} ) {
$loc += $engine->byte_size + ($args->{trans_id} - 1) * ( $engine->byte_size + $STALE_SIZE );
$engine->storage->print_at( $loc,
pack( $StP{$engine->byte_size}, $args->{value}->offset ),
pack( $StP{$STALE_SIZE}, $engine->get_txn_staleness_counter( $args->{trans_id} ) ),
);
}
else {
$engine->storage->print_at( $loc,
pack( $StP{$engine->byte_size}, $args->{value}->offset ),
);
}
}
sub mark_deleted {
my $self = shift;
my ($args) = @_;
$args ||= {};
my $engine = $self->engine;
$args->{trans_id} = $engine->trans_id unless exists $args->{trans_id};
my $spot = $self->offset + $self->base_size + $self->{idx} * $self->bucket_size;
$engine->add_entry( $args->{trans_id}, $spot );
my $loc = $spot
+ $engine->hash_size
+ $engine->byte_size;
if ( $args->{trans_id} ) {
$loc += $engine->byte_size + ($args->{trans_id} - 1) * ( $engine->byte_size + $STALE_SIZE );
$engine->storage->print_at( $loc,
pack( $StP{$engine->byte_size}, 1 ), # 1 is the marker for deleted
pack( $StP{$STALE_SIZE}, $engine->get_txn_staleness_counter( $args->{trans_id} ) ),
);
}
else {
$engine->storage->print_at( $loc,
pack( $StP{$engine->byte_size}, 1 ), # 1 is the marker for deleted
);
}
}
sub delete_md5 {
my $self = shift;
my ($args) = @_;
my $engine = $self->engine;
return undef unless $self->{found};
# Save the location so that we can free the data
my $location = $self->get_data_location_for({
allow_head => 0,
});
my $key_sector = $self->get_key_for;
my $spot = $self->offset + $self->base_size + $self->{idx} * $self->bucket_size;
$engine->storage->print_at( $spot,
$engine->storage->read_at(
$spot + $self->bucket_size,
$self->bucket_size * ( $engine->max_buckets - $self->{idx} - 1 ),
),
chr(0) x $self->bucket_size,
);
$key_sector->free;
my $data_sector = $self->engine->load_sector( $location );
my $data = $data_sector->data({ export => 1 });
$data_sector->free;
return $data;
}
sub get_data_location_for {
my $self = shift;
my ($args) = @_;
$args ||= {};
$args->{allow_head} = 0 unless exists $args->{allow_head};
$args->{trans_id} = $self->engine->trans_id unless exists $args->{trans_id};
$args->{idx} = $self->{idx} unless exists $args->{idx};
my $e = $self->engine;
my $spot = $self->offset + $self->base_size
+ $args->{idx} * $self->bucket_size
+ $e->hash_size
+ $e->byte_size;
if ( $args->{trans_id} ) {
$spot += $e->byte_size + ($args->{trans_id} - 1) * ( $e->byte_size + $STALE_SIZE );
}
my $buffer = $e->storage->read_at(
$spot,
$e->byte_size + $STALE_SIZE,
);
my ($loc, $staleness) = unpack( $StP{$e->byte_size} . ' ' . $StP{$STALE_SIZE}, $buffer );
# XXX Merge the two if-clauses below
if ( $args->{trans_id} ) {
# We have found an entry that is old, so get rid of it
if ( $staleness != (my $s = $e->get_txn_staleness_counter( $args->{trans_id} ) ) ) {
$e->storage->print_at(
$spot,
pack( $StP{$e->byte_size} . ' ' . $StP{$STALE_SIZE}, (0) x 2 ),
);
$loc = 0;
}
}
# If we're in a transaction and we never wrote to this location, try the
# HEAD instead.
if ( $args->{trans_id} && !$loc && $args->{allow_head} ) {
return $self->get_data_location_for({
trans_id => 0,
allow_head => 1,
idx => $args->{idx},
});
}
return $loc <= 1 ? 0 : $loc;
}
sub get_data_for {
my $self = shift;
my ($args) = @_;
$args ||= {};
return unless $self->{found};
my $location = $self->get_data_location_for({
allow_head => $args->{allow_head},
});
return $self->engine->load_sector( $location );
}
sub get_key_for {
my $self = shift;
my ($idx) = @_;
$idx = $self->{idx} unless defined $idx;
if ( $idx >= $self->engine->max_buckets ) {
DBM::Deep->_throw_error( "get_key_for(): Attempting to retrieve $idx" );
}
my $location = $self->engine->storage->read_at(
$self->offset + $self->base_size + $idx * $self->bucket_size + $self->engine->hash_size,
$self->engine->byte_size,
);
$location = unpack( $StP{$self->engine->byte_size}, $location );
DBM::Deep->_throw_error( "get_key_for: No location?" ) unless $location;
return $self->engine->load_sector( $location );
}
1;
__END__

View File

@@ -0,0 +1,15 @@
package DBM::Deep::Sector::File::Data;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
use base qw( DBM::Deep::Sector::File );
# This is in bytes
sub size { $_[0]{engine}->data_sector_size }
sub free_meth { return '_add_free_data_sector' }
1;
__END__

View File

@@ -0,0 +1,98 @@
package DBM::Deep::Sector::File::Index;
use strict;
use warnings FATAL => 'all';
use base qw( DBM::Deep::Sector::File );
my $STALE_SIZE = 2;
# Please refer to the pack() documentation for further information
my %StP = (
1 => 'C', # Unsigned char value (no order needed as it's just one byte)
2 => 'n', # Unsigned short in "network" (big-endian) order
4 => 'N', # Unsigned long in "network" (big-endian) order
8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
);
sub _init {
my $self = shift;
my $engine = $self->engine;
unless ( $self->offset ) {
my $leftover = $self->size - $self->base_size;
$self->{offset} = $engine->_request_index_sector( $self->size );
$engine->storage->print_at( $self->offset, $engine->SIG_INDEX ); # Sector type
# Skip staleness counter
$engine->storage->print_at( $self->offset + $self->base_size,
chr(0) x $leftover, # Zero-fill the rest
);
}
return $self;
}
#XXX Change here
sub size {
my $self = shift;
unless ( $self->{size} ) {
my $e = $self->engine;
$self->{size} = $self->base_size + $e->byte_size * $e->hash_chars;
}
return $self->{size};
}
sub free_meth { return '_add_free_index_sector' }
sub free {
my $self = shift;
my $e = $self->engine;
for my $i ( 0 .. $e->hash_chars - 1 ) {
my $l = $self->get_entry( $i ) or next;
$e->load_sector( $l )->free;
}
$self->SUPER::free();
}
sub _loc_for {
my $self = shift;
my ($idx) = @_;
return $self->offset + $self->base_size + $idx * $self->engine->byte_size;
}
sub get_entry {
my $self = shift;
my ($idx) = @_;
my $e = $self->engine;
DBM::Deep->_throw_error( "get_entry: Out of range ($idx)" )
if $idx < 0 || $idx >= $e->hash_chars;
return unpack(
$StP{$e->byte_size},
$e->storage->read_at( $self->_loc_for( $idx ), $e->byte_size ),
);
}
sub set_entry {
my $self = shift;
my ($idx, $loc) = @_;
my $e = $self->engine;
DBM::Deep->_throw_error( "set_entry: Out of range ($idx)" )
if $idx < 0 || $idx >= $e->hash_chars;
$self->engine->storage->print_at(
$self->_loc_for( $idx ),
pack( $StP{$e->byte_size}, $loc ),
);
}
1;
__END__

View File

@@ -0,0 +1,46 @@
package DBM::Deep::Sector::File::Null;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
use base qw( DBM::Deep::Sector::File::Data );
my $STALE_SIZE = 2;
# Please refer to the pack() documentation for further information
my %StP = (
1 => 'C', # Unsigned char value (no order needed as it's just one byte)
2 => 'n', # Unsigned short in "network" (big-endian) order
4 => 'N', # Unsigned long in "network" (big-endian) order
8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
);
sub type { $_[0]{engine}->SIG_NULL }
sub data_length { 0 }
sub data { return }
sub _init {
my $self = shift;
my $engine = $self->engine;
unless ( $self->offset ) {
my $leftover = $self->size - $self->base_size - 1 * $engine->byte_size - 1;
$self->{offset} = $engine->_request_data_sector( $self->size );
$engine->storage->print_at( $self->offset, $self->type ); # Sector type
# Skip staleness counter
$engine->storage->print_at( $self->offset + $self->base_size,
pack( $StP{$engine->byte_size}, 0 ), # Chain loc
pack( $StP{1}, $self->data_length ), # Data length
chr(0) x $leftover, # Zero-fill the rest
);
return;
}
}
1;
__END__

View File

@@ -0,0 +1,564 @@
package DBM::Deep::Sector::File::Reference;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
use base qw( DBM::Deep::Sector::File::Data );
use Scalar::Util;
my $STALE_SIZE = 2;
# Please refer to the pack() documentation for further information
my %StP = (
1 => 'C', # Unsigned char value (no order needed as it's just one byte)
2 => 'n', # Unsigned short in "network" (big-endian) order
4 => 'N', # Unsigned long in "network" (big-endian) order
8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
);
sub _init {
my $self = shift;
my $e = $self->engine;
unless ( $self->offset ) {
my $classname = Scalar::Util::blessed( delete $self->{data} );
my $leftover = $self->size - $self->base_size - 3 * $e->byte_size;
my $class_offset = 0;
if ( defined $classname ) {
my $class_sector = DBM::Deep::Sector::File::Scalar->new({
engine => $e,
data => $classname,
});
$class_offset = $class_sector->offset;
}
$self->{offset} = $e->_request_data_sector( $self->size );
$e->storage->print_at( $self->offset, $self->type ); # Sector type
# Skip staleness counter
$e->storage->print_at( $self->offset + $self->base_size,
pack( $StP{$e->byte_size}, 0 ), # Index/BList loc
pack( $StP{$e->byte_size}, $class_offset ), # Classname loc
pack( $StP{$e->byte_size}, 1 ), # Initial refcount
chr(0) x $leftover, # Zero-fill the rest
);
}
else {
$self->{type} = $e->storage->read_at( $self->offset, 1 );
}
$self->{staleness} = unpack(
$StP{$STALE_SIZE},
$e->storage->read_at( $self->offset + $e->SIG_SIZE, $STALE_SIZE ),
);
return;
}
sub get_data_location_for {
my $self = shift;
my ($args) = @_;
# Assume that the head is not allowed unless otherwise specified.
$args->{allow_head} = 0 unless exists $args->{allow_head};
# Assume we don't create a new blist location unless otherwise specified.
$args->{create} = 0 unless exists $args->{create};
my $blist = $self->get_bucket_list({
key_md5 => $args->{key_md5},
key => $args->{key},
create => $args->{create},
});
return unless $blist && $blist->{found};
# At this point, $blist knows where the md5 is. What it -doesn't- know yet
# is whether or not this transaction has this key. That's part of the next
# function call.
my $location = $blist->get_data_location_for({
allow_head => $args->{allow_head},
}) or return;
return $location;
}
sub get_data_for {
my $self = shift;
my ($args) = @_;
my $location = $self->get_data_location_for( $args )
or return;
return $self->engine->load_sector( $location );
}
sub write_data {
my $self = shift;
my ($args) = @_;
my $blist = $self->get_bucket_list({
key_md5 => $args->{key_md5},
key => $args->{key},
create => 1,
}) or DBM::Deep->_throw_error( "How did write_data fail (no blist)?!" );
# Handle any transactional bookkeeping.
if ( $self->engine->trans_id ) {
if ( ! $blist->has_md5 ) {
$blist->mark_deleted({
trans_id => 0,
});
}
}
else {
my @trans_ids = $self->engine->get_running_txn_ids;
if ( $blist->has_md5 ) {
if ( @trans_ids ) {
my $old_value = $blist->get_data_for;
foreach my $other_trans_id ( @trans_ids ) {
next if $blist->get_data_location_for({
trans_id => $other_trans_id,
allow_head => 0,
});
$blist->write_md5({
trans_id => $other_trans_id,
key => $args->{key},
key_md5 => $args->{key_md5},
value => $old_value->clone,
});
}
}
}
else {
if ( @trans_ids ) {
foreach my $other_trans_id ( @trans_ids ) {
#XXX This doesn't seem to possible to ever happen . . .
next if $blist->get_data_location_for({ trans_id => $other_trans_id, allow_head => 0 });
$blist->mark_deleted({
trans_id => $other_trans_id,
});
}
}
}
}
#XXX Is this safe to do transactionally?
# Free the place we're about to write to.
if ( $blist->get_data_location_for({ allow_head => 0 }) ) {
$blist->get_data_for({ allow_head => 0 })->free;
}
$blist->write_md5({
key => $args->{key},
key_md5 => $args->{key_md5},
value => $args->{value},
});
}
sub delete_key {
my $self = shift;
my ($args) = @_;
# This can return nothing if we are deleting an entry in a hashref that was
# auto-vivified as part of the delete process. For example:
# my $x = {};
# delete $x->{foo}{bar};
my $blist = $self->get_bucket_list({
key_md5 => $args->{key_md5},
}) or return;
# Save the location so that we can free the data
my $location = $blist->get_data_location_for({
allow_head => 0,
});
my $old_value = $location && $self->engine->load_sector( $location );
my @trans_ids = $self->engine->get_running_txn_ids;
# If we're the HEAD and there are running txns, then we need to clone this
# value to the other transactions to preserve Isolation.
if ( $self->engine->trans_id == 0 ) {
if ( @trans_ids ) {
foreach my $other_trans_id ( @trans_ids ) {
next if $blist->get_data_location_for({ trans_id => $other_trans_id, allow_head => 0 });
$blist->write_md5({
trans_id => $other_trans_id,
key => $args->{key},
key_md5 => $args->{key_md5},
value => $old_value->clone,
});
}
}
}
my $data;
if ( @trans_ids ) {
$blist->mark_deleted( $args );
if ( $old_value ) {
#XXX Is this export => 1 actually doing anything?
$data = $old_value->data({ export => 1 });
$old_value->free;
}
}
else {
$data = $blist->delete_md5( $args );
}
return $data;
}
sub write_blist_loc {
my $self = shift;
my ($loc) = @_;
my $engine = $self->engine;
$engine->storage->print_at( $self->offset + $self->base_size,
pack( $StP{$engine->byte_size}, $loc ),
);
}
sub get_blist_loc {
my $self = shift;
my $e = $self->engine;
my $blist_loc = $e->storage->read_at( $self->offset + $self->base_size, $e->byte_size );
return unpack( $StP{$e->byte_size}, $blist_loc );
}
sub get_bucket_list {
my $self = shift;
my ($args) = @_;
$args ||= {};
# XXX Add in check here for recycling?
my $engine = $self->engine;
my $blist_loc = $self->get_blist_loc;
# There's no index or blist yet
unless ( $blist_loc ) {
return unless $args->{create};
my $blist = DBM::Deep::Sector::File::BucketList->new({
engine => $engine,
key_md5 => $args->{key_md5},
});
$self->write_blist_loc( $blist->offset );
# $engine->storage->print_at( $self->offset + $self->base_size,
# pack( $StP{$engine->byte_size}, $blist->offset ),
# );
return $blist;
}
my $sector = $engine->load_sector( $blist_loc )
or DBM::Deep->_throw_error( "Cannot read sector at $blist_loc in get_bucket_list()" );
my $i = 0;
my $last_sector = undef;
while ( $sector->isa( 'DBM::Deep::Sector::File::Index' ) ) {
$blist_loc = $sector->get_entry( ord( substr( $args->{key_md5}, $i++, 1 ) ) );
$last_sector = $sector;
if ( $blist_loc ) {
$sector = $engine->load_sector( $blist_loc )
or DBM::Deep->_throw_error( "Cannot read sector at $blist_loc in get_bucket_list()" );
}
else {
$sector = undef;
last;
}
}
# This means we went through the Index sector(s) and found an empty slot
unless ( $sector ) {
return unless $args->{create};
DBM::Deep->_throw_error( "No last_sector when attempting to build a new entry" )
unless $last_sector;
my $blist = DBM::Deep::Sector::File::BucketList->new({
engine => $engine,
key_md5 => $args->{key_md5},
});
$last_sector->set_entry( ord( substr( $args->{key_md5}, $i - 1, 1 ) ) => $blist->offset );
return $blist;
}
$sector->find_md5( $args->{key_md5} );
# See whether or not we need to reindex the bucketlist
# Yes, the double-braces are there for a reason. if() doesn't create a
# redo-able block, so we have to create a bare block within the if() for
# redo-purposes.
# Patch and idea submitted by sprout@cpan.org. -RobK, 2008-01-09
if ( !$sector->has_md5 && $args->{create} && $sector->{idx} == -1 ) {{
my $redo;
my $new_index = DBM::Deep::Sector::File::Index->new({
engine => $engine,
});
my %blist_cache;
#XXX q.v. the comments for this function.
foreach my $entry ( $sector->chopped_up ) {
my ($spot, $md5) = @{$entry};
my $idx = ord( substr( $md5, $i, 1 ) );
# XXX This is inefficient
my $blist = $blist_cache{$idx}
||= DBM::Deep::Sector::File::BucketList->new({
engine => $engine,
});
$new_index->set_entry( $idx => $blist->offset );
my $new_spot = $blist->write_at_next_open( $md5 );
$engine->reindex_entry( $spot => $new_spot );
}
# Handle the new item separately.
{
my $idx = ord( substr( $args->{key_md5}, $i, 1 ) );
# If all the previous blist's items have been thrown into one
# blist and the new item belongs in there too, we need
# another index.
if ( keys %blist_cache == 1 and each %blist_cache == $idx ) {
++$i, ++$redo;
} else {
my $blist = $blist_cache{$idx}
||= DBM::Deep::Sector::File::BucketList->new({
engine => $engine,
});
$new_index->set_entry( $idx => $blist->offset );
#XXX THIS IS HACKY!
$blist->find_md5( $args->{key_md5} );
$blist->write_md5({
key => $args->{key},
key_md5 => $args->{key_md5},
value => DBM::Deep::Sector::File::Null->new({
engine => $engine,
data => undef,
}),
});
}
}
if ( $last_sector ) {
$last_sector->set_entry(
ord( substr( $args->{key_md5}, $i - 1, 1 ) ),
$new_index->offset,
);
} else {
$engine->storage->print_at( $self->offset + $self->base_size,
pack( $StP{$engine->byte_size}, $new_index->offset ),
);
}
$sector->wipe;
$sector->free;
if ( $redo ) {
(undef, $sector) = %blist_cache;
$last_sector = $new_index;
redo;
}
$sector = $blist_cache{ ord( substr( $args->{key_md5}, $i, 1 ) ) };
$sector->find_md5( $args->{key_md5} );
}}
return $sector;
}
sub get_class_offset {
my $self = shift;
my $e = $self->engine;
return unpack(
$StP{$e->byte_size},
$e->storage->read_at(
$self->offset + $self->base_size + 1 * $e->byte_size, $e->byte_size,
),
);
}
sub get_classname {
my $self = shift;
my $class_offset = $self->get_class_offset;
return unless $class_offset;
return $self->engine->load_sector( $class_offset )->data;
}
# Look to hoist this method into a ::Reference trait
sub data {
my $self = shift;
my ($args) = @_;
$args ||= {};
my $engine = $self->engine;
my $cache_entry = $engine->cache->{ $self->offset } ||= {};
my $trans_id = $engine->trans_id;
my $obj;
if ( !defined $$cache_entry{ $trans_id } ) {
$obj = DBM::Deep->new({
type => $self->type,
base_offset => $self->offset,
staleness => $self->staleness,
storage => $engine->storage,
engine => $engine,
});
$$cache_entry{ $trans_id } = $obj;
Scalar::Util::weaken($$cache_entry{ $trans_id });
}
else {
$obj = $$cache_entry{ $trans_id };
}
# We're not exporting, so just return.
unless ( $args->{export} ) {
if ( $engine->storage->{autobless} ) {
my $classname = $self->get_classname;
if ( defined $classname ) {
bless $obj, $classname;
}
}
return $obj;
}
# We shouldn't export if this is still referred to.
if ( $self->get_refcount > 1 ) {
return $obj;
}
return $obj->export;
}
sub free {
my $self = shift;
# We're not ready to be removed yet.
return if $self->decrement_refcount > 0;
my $e = $self->engine;
# Rebless the object into DBM::Deep::Null.
# In external_refs mode, this will already have been removed from
# the cache, so we can skip this.
if(!$e->{external_refs}) {
# eval { %{ $e->cache->{ $self->offset }{ $e->trans_id } } = (); };
# eval { @{ $e->cache->{ $self->offset }{ $e->trans_id } } = (); };
my $cache = $e->cache;
my $off = $self->offset;
if( exists $cache->{ $off }
and exists $cache->{ $off }{ my $trans_id = $e->trans_id } ) {
bless $cache->{ $off }{ $trans_id }, 'DBM::Deep::Null'
if defined $cache->{ $off }{ $trans_id };
delete $cache->{ $off }{ $trans_id };
}
}
my $blist_loc = $self->get_blist_loc;
$e->load_sector( $blist_loc )->free if $blist_loc;
my $class_loc = $self->get_class_offset;
$e->load_sector( $class_loc )->free if $class_loc;
$self->SUPER::free();
}
sub increment_refcount {
my $self = shift;
my $refcount = $self->get_refcount;
$refcount++;
$self->write_refcount( $refcount );
return $refcount;
}
sub decrement_refcount {
my $self = shift;
my $refcount = $self->get_refcount;
$refcount--;
$self->write_refcount( $refcount );
return $refcount;
}
sub get_refcount {
my $self = shift;
my $e = $self->engine;
return unpack(
$StP{$e->byte_size},
$e->storage->read_at(
$self->offset + $self->base_size + 2 * $e->byte_size, $e->byte_size,
),
);
}
sub write_refcount {
my $self = shift;
my ($num) = @_;
my $e = $self->engine;
$e->storage->print_at(
$self->offset + $self->base_size + 2 * $e->byte_size,
pack( $StP{$e->byte_size}, $num ),
);
}
sub clear {
my $self = shift;
my $blist_loc = $self->get_blist_loc or return;
my $engine = $self->engine;
# This won't work with autoblessed items.
if ($engine->get_running_txn_ids) {
# ~~~ Temporary; the code below this block needs to be modified to
# take transactions into account.
$self->data->_get_self->_clear;
return;
}
my $sector = $engine->load_sector( $blist_loc )
or DBM::Deep->_throw_error(
"Cannot read sector at $blist_loc in clear()"
);
# Set blist offset to 0
$engine->storage->print_at( $self->offset + $self->base_size,
pack( $StP{$engine->byte_size}, 0 ),
);
# Free the blist
$sector->free;
return;
}
1;
__END__

View File

@@ -0,0 +1,143 @@
package DBM::Deep::Sector::File::Scalar;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
no warnings 'recursion';
use base qw( DBM::Deep::Sector::File::Data );
my $STALE_SIZE = 2;
# Please refer to the pack() documentation for further information
my %StP = (
1 => 'C', # Unsigned char value (no order needed as it's just one byte)
2 => 'n', # Unsigned short in "network" (big-endian) order
4 => 'N', # Unsigned long in "network" (big-endian) order
8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
);
sub free {
my $self = shift;
my $chain_loc = $self->chain_loc;
$self->SUPER::free();
if ( $chain_loc ) {
$self->engine->load_sector( $chain_loc )->free;
}
return;
}
sub _init {
my $self = shift;
my $engine = $self->engine;
unless ( $self->offset ) {
my $data_section = $self->size - $self->base_size - $engine->byte_size - 1;
$self->{offset} = $engine->_request_data_sector( $self->size );
my $data = delete $self->{data};
my $utf8 = do { no warnings 'utf8'; $data !~ /^[\0-\xff]*\z/ };
if($utf8){
if($engine->{v} < 4) {
DBM::Deep->_throw_error(
"This database format version is too old for Unicode"
);
}
utf8::encode $data;
$self->{type} = $engine->SIG_UNIDATA;
}
else { $self->{type} = $engine->SIG_DATA; }
my $dlen = length $data;
my $continue = 1;
my $curr_offset = $self->offset;
while ( $continue ) {
my $next_offset = 0;
my ($leftover, $this_len, $chunk);
if ( $dlen > $data_section ) {
$leftover = 0;
$this_len = $data_section;
$chunk = substr( $data, 0, $this_len );
$dlen -= $data_section;
$next_offset = $engine->_request_data_sector( $self->size );
$data = substr( $data, $this_len );
}
else {
$leftover = $data_section - $dlen;
$this_len = $dlen;
$chunk = $data;
$continue = 0;
}
$engine->storage->print_at( $curr_offset, $self->type ); # Sector type
# Skip staleness
$engine->storage->print_at( $curr_offset + $self->base_size,
pack( $StP{$engine->byte_size}, $next_offset ), # Chain loc
pack( $StP{1}, $this_len ), # Data length
$chunk, # Data to be stored in this sector
chr(0) x $leftover, # Zero-fill the rest
);
$curr_offset = $next_offset;
}
return;
}
}
sub data_length {
my $self = shift;
my $buffer = $self->engine->storage->read_at(
$self->offset + $self->base_size + $self->engine->byte_size, 1
);
return unpack( $StP{1}, $buffer );
}
sub chain_loc {
my $self = shift;
return unpack(
$StP{$self->engine->byte_size},
$self->engine->storage->read_at(
$self->offset + $self->base_size,
$self->engine->byte_size,
),
);
}
sub data {
my $self = shift;
my $engine = $self->engine;
my $data;
while ( 1 ) {
my $chain_loc = $self->chain_loc;
$data .= $engine->storage->read_at(
$self->offset + $self->base_size + $engine->byte_size + 1, $self->data_length,
);
last unless $chain_loc;
$self = $engine->load_sector( $chain_loc );
}
utf8::decode $data if $self->type eq $engine->SIG_UNIDATA;
return $data;
}
1;
__END__

View File

@@ -0,0 +1,70 @@
package DBM::Deep::Storage;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
=head1 NAME
DBM::Deep::Storage - abstract base class for storage
=head2 flush()
This flushes the filehandle. This takes no parameters and returns nothing.
=cut
sub flush { die "flush must be implemented in a child class" }
=head2 is_writable()
This takes no parameters. It returns a boolean saying if this filehandle is
writable.
Taken from L<http://www.perlmonks.org/?node_id=691054/>.
=cut
sub is_writable { die "is_writable must be implemented in a child class" }
=head1 LOCKING
This is where the actual locking of the storage medium is performed.
Nested locking is supported.
B<NOTE>: It is unclear what will happen if a read lock is taken, then
a write lock is taken as a nested lock, then the write lock is released.
Currently, the only locking method supported is flock(1). This is a
whole-file lock. In the future, more granular locking may be supported.
The API for that is unclear right now.
The following methods manage the locking status. In all cases, they take
a L<DBM::Deep> object and returns nothing.
=over 4
=item * lock_exclusive( $obj )
Take a lock usable for writing.
=item * lock_shared( $obj )
Take a lock usable for reading.
=item * unlock( $obj )
Releases the last lock taken. If this is the outermost lock, then the
object is actually unlocked.
=back
=cut
sub lock_exclusive { die "lock_exclusive must be implemented in a child class" }
sub lock_shared { die "lock_shared must be implemented in a child class" }
sub unlock { die "unlock must be implemented in a child class" }
1;
__END__

View File

@@ -0,0 +1,170 @@
package DBM::Deep::Storage::DBI;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
use base 'DBM::Deep::Storage';
use DBI;
sub new {
my $class = shift;
my ($args) = @_;
my $self = bless {
autobless => 1,
dbh => undef,
dbi => undef,
}, $class;
# Grab the parameters we want to use
foreach my $param ( keys %$self ) {
next unless exists $args->{$param};
$self->{$param} = $args->{$param};
}
if ( $self->{dbh} ) {
$self->{driver} = lc $self->{dbh}->{Driver}->{Name};
}
else {
$self->open;
}
# Foreign keys are turned off by default in SQLite3 (for now)
#q.v. http://search.cpan.org/~adamk/DBD-SQLite-1.27/lib/DBD/SQLite.pm#Foreign_Keys
# for more info.
if ( $self->driver eq 'sqlite' ) {
$self->{dbh}->do( 'PRAGMA foreign_keys = ON' );
}
return $self;
}
sub open {
my $self = shift;
return if $self->{dbh};
$self->{dbh} = DBI->connect(
$self->{dbi}{dsn}, $self->{dbi}{username}, $self->{dbi}{password}, {
AutoCommit => 1,
PrintError => 0,
RaiseError => 1,
%{ $self->{dbi}{connect_args} || {} },
},
) or die $DBI::error;
# Should we use the same method as done in new() if passed a $dbh?
(undef, $self->{driver}) = map defined($_) ? lc($_) : undef, DBI->parse_dsn( $self->{dbi}{dsn} );
return 1;
}
sub close {
my $self = shift;
$self->{dbh}->disconnect if $self->{dbh};
return 1;
}
sub DESTROY {
my $self = shift;
$self->close if ref $self;
}
# Is there a portable way of determining writability to a DBH?
sub is_writable {
my $self = shift;
return 1;
}
sub lock_exclusive {
my $self = shift;
}
sub lock_shared {
my $self = shift;
}
sub unlock {
my $self = shift;
# $self->{dbh}->commit;
}
#sub begin_work {
# my $self = shift;
# $self->{dbh}->begin_work;
#}
#
#sub commit {
# my $self = shift;
# $self->{dbh}->commit;
#}
#
#sub rollback {
# my $self = shift;
# $self->{dbh}->rollback;
#}
sub read_from {
my $self = shift;
my ($table, $cond, @cols) = @_;
$cond = { id => $cond } unless ref $cond;
my @keys = keys %$cond;
my $where = join ' AND ', map { "`$_` = ?" } @keys;
return $self->{dbh}->selectall_arrayref(
"SELECT `@{[join '`,`', @cols ]}` FROM $table WHERE $where",
{ Slice => {} }, @{$cond}{@keys},
);
}
sub flush {}
sub write_to {
my $self = shift;
my ($table, $id, %args) = @_;
my @keys = keys %args;
my $sql =
"REPLACE INTO $table ( `id`, "
. join( ',', map { "`$_`" } @keys )
. ") VALUES ("
. join( ',', ('?') x (@keys + 1) )
. ")";
$self->{dbh}->do( $sql, undef, $id, @args{@keys} );
return $self->{dbh}->last_insert_id("", "", "", "");
}
sub delete_from {
my $self = shift;
my ($table, $cond) = @_;
$cond = { id => $cond } unless ref $cond;
my @keys = keys %$cond;
my $where = join ' AND ', map { "`$_` = ?" } @keys;
$self->{dbh}->do(
"DELETE FROM $table WHERE $where", undef, @{$cond}{@keys},
);
}
sub driver { $_[0]{driver} }
sub rand_function {
my $self = shift;
my $driver = $self->driver;
$driver eq 'sqlite' and return 'random()';
$driver eq 'mysql' and return 'RAND()';
die "rand_function undefined for $driver\n";
}
1;
__END__

View File

@@ -0,0 +1,399 @@
package DBM::Deep::Storage::File;
use 5.008_004;
use strict;
use warnings FATAL => 'all';
use Fcntl qw( :DEFAULT :flock :seek );
use constant DEBUG => 0;
use base 'DBM::Deep::Storage';
=head1 NAME
DBM::Deep::Storage::File - mediate low-level interaction with storage mechanism
=head1 PURPOSE
This is an internal-use-only object for L<DBM::Deep>. It mediates the low-level
interaction with the storage mechanism.
Currently, the only storage mechanism supported is the file system.
=head1 OVERVIEW
This class provides an abstraction to the storage mechanism so that the Engine
(the only class that uses this class) doesn't have to worry about that.
=head1 METHODS
=head2 new( \%args )
=cut
sub new {
my $class = shift;
my ($args) = @_;
my $self = bless {
autobless => 1,
autoflush => 1,
end => 0,
fh => undef,
file => undef,
file_offset => 0,
locking => 1,
locked => 0,
#XXX Migrate this to the engine, where it really belongs.
filter_store_key => undef,
filter_store_value => undef,
filter_fetch_key => undef,
filter_fetch_value => undef,
}, $class;
# Grab the parameters we want to use
foreach my $param ( keys %$self ) {
next unless exists $args->{$param};
$self->{$param} = $args->{$param};
}
if ( $self->{fh} && !$self->{file_offset} ) {
$self->{file_offset} = tell( $self->{fh} );
}
$self->open unless $self->{fh};
return $self;
}
=head2 open()
This method opens the filehandle for the filename in C< file >.
There is no return value.
=cut
# TODO: What happens if we ->open when we already have a $fh?
sub open {
my $self = shift;
# Adding O_BINARY should remove the need for the binmode below. However,
# I'm not going to remove it because I don't have the Win32 chops to be
# absolutely certain everything will be ok.
my $flags = O_CREAT | O_BINARY;
if ( !-e $self->{file} || -w _ ) {
$flags |= O_RDWR;
}
else {
$flags |= O_RDONLY;
}
my $fh;
sysopen( $fh, $self->{file}, $flags )
or die "DBM::Deep: Cannot sysopen file '$self->{file}': $!\n";
$self->{fh} = $fh;
# Even though we use O_BINARY, better be safe than sorry.
binmode $fh;
if ($self->{autoflush}) {
my $old = select $fh;
$|=1;
select $old;
}
return 1;
}
=head2 close()
If the filehandle is opened, this will close it.
There is no return value.
=cut
sub close {
my $self = shift;
if ( $self->{fh} ) {
close $self->{fh};
$self->{fh} = undef;
}
return 1;
}
=head2 size()
This will return the size of the DB. If file_offset is set, this will take that into account.
B<NOTE>: This function isn't used internally anywhere.
=cut
sub size {
my $self = shift;
return 0 unless $self->{fh};
return( (-s $self->{fh}) - $self->{file_offset} );
}
=head2 set_inode()
This will set the inode value of the underlying file object.
This is only needed to handle some obscure Win32 bugs. It really shouldn't be
needed outside this object.
There is no return value.
=cut
sub set_inode {
my $self = shift;
unless ( defined $self->{inode} ) {
my @stats = stat($self->{fh});
$self->{inode} = $stats[1];
$self->{end} = $stats[7];
}
return 1;
}
=head2 print_at( $offset, @data )
This takes an optional offset and some data to print.
C< $offset >, if defined, will be used to seek into the file. If file_offset is
set, it will be used as the zero location. If it is undefined, no seeking will
occur. Then, C< @data > will be printed to the current location.
There is no return value.
=cut
sub print_at {
my $self = shift;
my $loc = shift;
local ($,,$\);
my $fh = $self->{fh};
if ( defined $loc ) {
seek( $fh, $loc + $self->{file_offset}, SEEK_SET );
}
if ( DEBUG ) {
my $caller = join ':', (caller)[0,2];
my $len = length( join '', @_ );
warn "($caller) print_at( " . (defined $loc ? $loc : '<undef>') . ", $len )\n";
}
print( $fh @_ ) or die "Internal Error (print_at($loc)): $!\n";
return 1;
}
=head2 read_at( $offset, $length )
This takes an optional offset and a length.
C< $offset >, if defined, will be used to seek into the file. If file_offset is
set, it will be used as the zero location. If it is undefined, no seeking will
occur. Then, C< $length > bytes will be read from the current location.
The data read will be returned.
=cut
sub read_at {
my $self = shift;
my ($loc, $size) = @_;
my $fh = $self->{fh};
if ( defined $loc ) {
seek( $fh, $loc + $self->{file_offset}, SEEK_SET );
}
if ( DEBUG ) {
my $caller = join ':', (caller)[0,2];
warn "($caller) read_at( " . (defined $loc ? $loc : '<undef>') . ", $size )\n";
}
my $buffer;
read( $fh, $buffer, $size);
return $buffer;
}
=head2 DESTROY
When the ::Storage::File object goes out of scope, it will be closed.
=cut
sub DESTROY {
my $self = shift;
return unless $self;
$self->close;
return;
}
=head2 request_space( $size )
This takes a size and adds that much space to the DBM.
This returns the offset for the new location.
=cut
sub request_space {
my $self = shift;
my ($size) = @_;
#XXX Do I need to reset $self->{end} here? I need a testcase
my $loc = $self->{end};
$self->{end} += $size;
return $loc;
}
=head2 copy_stats( $target_filename )
This will take the stats for the current filehandle and apply them to
C< $target_filename >. The stats copied are:
=over 4
=item * Onwer UID and GID
=item * Permissions
=back
=cut
sub copy_stats {
my $self = shift;
my ($temp_filename) = @_;
my @stats = stat( $self->{fh} );
my $perms = $stats[2] & 07777;
my $uid = $stats[4];
my $gid = $stats[5];
chown( $uid, $gid, $temp_filename );
chmod( $perms, $temp_filename );
}
sub flush {
my $self = shift;
# Flush the filehandle
my $old_fh = select $self->{fh};
my $old_af = $|; $| = 1; $| = $old_af;
select $old_fh;
return 1;
}
sub is_writable {
my $self = shift;
my $fh = $self->{fh};
return unless defined $fh;
return unless defined fileno $fh;
local $\ = ''; # just in case
no warnings; # temporarily disable warnings
local $^W; # temporarily disable warnings
return print $fh '';
}
sub lock_exclusive {
my $self = shift;
my ($obj) = @_;
return $self->_lock( $obj, LOCK_EX );
}
sub lock_shared {
my $self = shift;
my ($obj) = @_;
return $self->_lock( $obj, LOCK_SH );
}
sub _lock {
my $self = shift;
my ($obj, $type) = @_;
$type = LOCK_EX unless defined $type;
#XXX This is a temporary fix for Win32 and autovivification. It
# needs to improve somehow. -RobK, 2008-03-09
if ( $^O eq 'MSWin32' || $^O eq 'cygwin' ) {
$type = LOCK_EX;
}
if (!defined($self->{fh})) { return; }
#XXX This either needs to allow for upgrading a shared lock to an
# exclusive lock or something else with autovivification.
# -RobK, 2008-03-09
if ($self->{locking}) {
if (!$self->{locked}) {
flock($self->{fh}, $type);
# refresh end counter in case file has changed size
my @stats = stat($self->{fh});
$self->{end} = $stats[7];
# double-check file inode, in case another process
# has optimize()d our file while we were waiting.
if (defined($self->{inode}) && $stats[1] != $self->{inode}) {
$self->close;
$self->open;
#XXX This needs work
$obj->{engine}->setup( $obj );
flock($self->{fh}, $type); # re-lock
# This may not be necessary after re-opening
$self->{end} = (stat($self->{fh}))[7]; # re-end
}
}
$self->{locked}++;
return 1;
}
return;
}
sub unlock {
my $self = shift;
if (!defined($self->{fh})) { return; }
if ($self->{locking} && $self->{locked} > 0) {
$self->{locked}--;
if (!$self->{locked}) {
flock($self->{fh}, LOCK_UN);
return 1;
}
return;
}
return;
}
1;
__END__