Initial Commit
This commit is contained in:
844
database/perl/lib/Hash/Util.pm
Normal file
844
database/perl/lib/Hash/Util.pm
Normal file
@@ -0,0 +1,844 @@
|
||||
package Hash::Util;
|
||||
|
||||
require 5.007003;
|
||||
use strict;
|
||||
use Carp;
|
||||
use warnings;
|
||||
no warnings 'uninitialized';
|
||||
use warnings::register;
|
||||
use Scalar::Util qw(reftype);
|
||||
|
||||
require Exporter;
|
||||
our @ISA = qw(Exporter);
|
||||
our @EXPORT_OK = qw(
|
||||
fieldhash fieldhashes
|
||||
|
||||
all_keys
|
||||
lock_keys unlock_keys
|
||||
lock_value unlock_value
|
||||
lock_hash unlock_hash
|
||||
lock_keys_plus
|
||||
hash_locked hash_unlocked
|
||||
hashref_locked hashref_unlocked
|
||||
hidden_keys legal_keys
|
||||
|
||||
lock_ref_keys unlock_ref_keys
|
||||
lock_ref_value unlock_ref_value
|
||||
lock_hashref unlock_hashref
|
||||
lock_ref_keys_plus
|
||||
hidden_ref_keys legal_ref_keys
|
||||
|
||||
hash_seed hash_value hv_store
|
||||
bucket_stats bucket_stats_formatted bucket_info bucket_array
|
||||
lock_hash_recurse unlock_hash_recurse
|
||||
lock_hashref_recurse unlock_hashref_recurse
|
||||
|
||||
hash_traversal_mask
|
||||
|
||||
bucket_ratio
|
||||
used_buckets
|
||||
num_buckets
|
||||
);
|
||||
BEGIN {
|
||||
# make sure all our XS routines are available early so their prototypes
|
||||
# are correctly applied in the following code.
|
||||
our $VERSION = '0.23';
|
||||
require XSLoader;
|
||||
XSLoader::load();
|
||||
}
|
||||
|
||||
sub import {
|
||||
my $class = shift;
|
||||
if ( grep /fieldhash/, @_ ) {
|
||||
require Hash::Util::FieldHash;
|
||||
Hash::Util::FieldHash->import(':all'); # for re-export
|
||||
}
|
||||
unshift @_, $class;
|
||||
goto &Exporter::import;
|
||||
}
|
||||
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Hash::Util - A selection of general-utility hash subroutines
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
# Restricted hashes
|
||||
|
||||
use Hash::Util qw(
|
||||
fieldhash fieldhashes
|
||||
|
||||
all_keys
|
||||
lock_keys unlock_keys
|
||||
lock_value unlock_value
|
||||
lock_hash unlock_hash
|
||||
lock_keys_plus
|
||||
hash_locked hash_unlocked
|
||||
hashref_locked hashref_unlocked
|
||||
hidden_keys legal_keys
|
||||
|
||||
lock_ref_keys unlock_ref_keys
|
||||
lock_ref_value unlock_ref_value
|
||||
lock_hashref unlock_hashref
|
||||
lock_ref_keys_plus
|
||||
hidden_ref_keys legal_ref_keys
|
||||
|
||||
hash_seed hash_value hv_store
|
||||
bucket_stats bucket_info bucket_array
|
||||
lock_hash_recurse unlock_hash_recurse
|
||||
lock_hashref_recurse unlock_hashref_recurse
|
||||
|
||||
hash_traversal_mask
|
||||
);
|
||||
|
||||
%hash = (foo => 42, bar => 23);
|
||||
# Ways to restrict a hash
|
||||
lock_keys(%hash);
|
||||
lock_keys(%hash, @keyset);
|
||||
lock_keys_plus(%hash, @additional_keys);
|
||||
|
||||
# Ways to inspect the properties of a restricted hash
|
||||
my @legal = legal_keys(%hash);
|
||||
my @hidden = hidden_keys(%hash);
|
||||
my $ref = all_keys(%hash,@keys,@hidden);
|
||||
my $is_locked = hash_locked(%hash);
|
||||
|
||||
# Remove restrictions on the hash
|
||||
unlock_keys(%hash);
|
||||
|
||||
# Lock individual values in a hash
|
||||
lock_value (%hash, 'foo');
|
||||
unlock_value(%hash, 'foo');
|
||||
|
||||
# Ways to change the restrictions on both keys and values
|
||||
lock_hash (%hash);
|
||||
unlock_hash(%hash);
|
||||
|
||||
my $hashes_are_randomised = hash_seed() !~ /^\0+$/;
|
||||
|
||||
my $int_hash_value = hash_value( 'string' );
|
||||
|
||||
my $mask= hash_traversal_mask(%hash);
|
||||
|
||||
hash_traversal_mask(%hash,1234);
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
C<Hash::Util> and C<Hash::Util::FieldHash> contain special functions
|
||||
for manipulating hashes that don't really warrant a keyword.
|
||||
|
||||
C<Hash::Util> contains a set of functions that support
|
||||
L<restricted hashes|/"Restricted hashes">. These are described in
|
||||
this document. C<Hash::Util::FieldHash> contains an (unrelated)
|
||||
set of functions that support the use of hashes in
|
||||
I<inside-out classes>, described in L<Hash::Util::FieldHash>.
|
||||
|
||||
By default C<Hash::Util> does not export anything.
|
||||
|
||||
=head2 Restricted hashes
|
||||
|
||||
5.8.0 introduces the ability to restrict a hash to a certain set of
|
||||
keys. No keys outside of this set can be added. It also introduces
|
||||
the ability to lock an individual key so it cannot be deleted and the
|
||||
ability to ensure that an individual value cannot be changed.
|
||||
|
||||
This is intended to largely replace the deprecated pseudo-hashes.
|
||||
|
||||
=over 4
|
||||
|
||||
=item B<lock_keys>
|
||||
|
||||
=item B<unlock_keys>
|
||||
|
||||
lock_keys(%hash);
|
||||
lock_keys(%hash, @keys);
|
||||
|
||||
Restricts the given %hash's set of keys to @keys. If @keys is not
|
||||
given it restricts it to its current keyset. No more keys can be
|
||||
added. delete() and exists() will still work, but will not alter
|
||||
the set of allowed keys. B<Note>: the current implementation prevents
|
||||
the hash from being bless()ed while it is in a locked state. Any attempt
|
||||
to do so will raise an exception. Of course you can still bless()
|
||||
the hash before you call lock_keys() so this shouldn't be a problem.
|
||||
|
||||
unlock_keys(%hash);
|
||||
|
||||
Removes the restriction on the %hash's keyset.
|
||||
|
||||
B<Note> that if any of the values of the hash have been locked they will not
|
||||
be unlocked after this sub executes.
|
||||
|
||||
Both routines return a reference to the hash operated on.
|
||||
|
||||
=cut
|
||||
|
||||
sub lock_ref_keys {
|
||||
my($hash, @keys) = @_;
|
||||
|
||||
_clear_placeholders(%$hash);
|
||||
if( @keys ) {
|
||||
my %keys = map { ($_ => 1) } @keys;
|
||||
my %original_keys = map { ($_ => 1) } keys %$hash;
|
||||
foreach my $k (keys %original_keys) {
|
||||
croak "Hash has key '$k' which is not in the new key set"
|
||||
unless $keys{$k};
|
||||
}
|
||||
|
||||
foreach my $k (@keys) {
|
||||
$hash->{$k} = undef unless exists $hash->{$k};
|
||||
}
|
||||
Internals::SvREADONLY %$hash, 1;
|
||||
|
||||
foreach my $k (@keys) {
|
||||
delete $hash->{$k} unless $original_keys{$k};
|
||||
}
|
||||
}
|
||||
else {
|
||||
Internals::SvREADONLY %$hash, 1;
|
||||
}
|
||||
|
||||
return $hash;
|
||||
}
|
||||
|
||||
sub unlock_ref_keys {
|
||||
my $hash = shift;
|
||||
|
||||
Internals::SvREADONLY %$hash, 0;
|
||||
return $hash;
|
||||
}
|
||||
|
||||
sub lock_keys (\%;@) { lock_ref_keys(@_) }
|
||||
sub unlock_keys (\%) { unlock_ref_keys(@_) }
|
||||
|
||||
#=item B<_clear_placeholders>
|
||||
#
|
||||
# This function removes any placeholder keys from a hash. See Perl_hv_clear_placeholders()
|
||||
# in hv.c for what it does exactly. It is currently exposed as XS by universal.c and
|
||||
# injected into the Hash::Util namespace.
|
||||
#
|
||||
# It is not intended for use outside of this module, and may be changed
|
||||
# or removed without notice or deprecation cycle.
|
||||
#
|
||||
#=cut
|
||||
#
|
||||
# sub _clear_placeholders {} # just in case someone searches...
|
||||
|
||||
=item B<lock_keys_plus>
|
||||
|
||||
lock_keys_plus(%hash,@additional_keys)
|
||||
|
||||
Similar to C<lock_keys()>, with the difference being that the optional key list
|
||||
specifies keys that may or may not be already in the hash. Essentially this is
|
||||
an easier way to say
|
||||
|
||||
lock_keys(%hash,@additional_keys,keys %hash);
|
||||
|
||||
Returns a reference to %hash
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
sub lock_ref_keys_plus {
|
||||
my ($hash,@keys) = @_;
|
||||
my @delete;
|
||||
_clear_placeholders(%$hash);
|
||||
foreach my $key (@keys) {
|
||||
unless (exists($hash->{$key})) {
|
||||
$hash->{$key}=undef;
|
||||
push @delete,$key;
|
||||
}
|
||||
}
|
||||
Internals::SvREADONLY(%$hash,1);
|
||||
delete @{$hash}{@delete};
|
||||
return $hash
|
||||
}
|
||||
|
||||
sub lock_keys_plus(\%;@) { lock_ref_keys_plus(@_) }
|
||||
|
||||
|
||||
=item B<lock_value>
|
||||
|
||||
=item B<unlock_value>
|
||||
|
||||
lock_value (%hash, $key);
|
||||
unlock_value(%hash, $key);
|
||||
|
||||
Locks and unlocks the value for an individual key of a hash. The value of a
|
||||
locked key cannot be changed.
|
||||
|
||||
Unless %hash has already been locked the key/value could be deleted
|
||||
regardless of this setting.
|
||||
|
||||
Returns a reference to the %hash.
|
||||
|
||||
=cut
|
||||
|
||||
sub lock_ref_value {
|
||||
my($hash, $key) = @_;
|
||||
# I'm doubtful about this warning, as it seems not to be true.
|
||||
# Marking a value in the hash as RO is useful, regardless
|
||||
# of the status of the hash itself.
|
||||
carp "Cannot usefully lock values in an unlocked hash"
|
||||
if !Internals::SvREADONLY(%$hash) && warnings::enabled;
|
||||
Internals::SvREADONLY $hash->{$key}, 1;
|
||||
return $hash
|
||||
}
|
||||
|
||||
sub unlock_ref_value {
|
||||
my($hash, $key) = @_;
|
||||
Internals::SvREADONLY $hash->{$key}, 0;
|
||||
return $hash
|
||||
}
|
||||
|
||||
sub lock_value (\%$) { lock_ref_value(@_) }
|
||||
sub unlock_value (\%$) { unlock_ref_value(@_) }
|
||||
|
||||
|
||||
=item B<lock_hash>
|
||||
|
||||
=item B<unlock_hash>
|
||||
|
||||
lock_hash(%hash);
|
||||
|
||||
lock_hash() locks an entire hash, making all keys and values read-only.
|
||||
No value can be changed, no keys can be added or deleted.
|
||||
|
||||
unlock_hash(%hash);
|
||||
|
||||
unlock_hash() does the opposite of lock_hash(). All keys and values
|
||||
are made writable. All values can be changed and keys can be added
|
||||
and deleted.
|
||||
|
||||
Returns a reference to the %hash.
|
||||
|
||||
=cut
|
||||
|
||||
sub lock_hashref {
|
||||
my $hash = shift;
|
||||
|
||||
lock_ref_keys($hash);
|
||||
|
||||
foreach my $value (values %$hash) {
|
||||
Internals::SvREADONLY($value,1);
|
||||
}
|
||||
|
||||
return $hash;
|
||||
}
|
||||
|
||||
sub unlock_hashref {
|
||||
my $hash = shift;
|
||||
|
||||
foreach my $value (values %$hash) {
|
||||
Internals::SvREADONLY($value, 0);
|
||||
}
|
||||
|
||||
unlock_ref_keys($hash);
|
||||
|
||||
return $hash;
|
||||
}
|
||||
|
||||
sub lock_hash (\%) { lock_hashref(@_) }
|
||||
sub unlock_hash (\%) { unlock_hashref(@_) }
|
||||
|
||||
=item B<lock_hash_recurse>
|
||||
|
||||
=item B<unlock_hash_recurse>
|
||||
|
||||
lock_hash_recurse(%hash);
|
||||
|
||||
lock_hash() locks an entire hash and any hashes it references recursively,
|
||||
making all keys and values read-only. No value can be changed, no keys can
|
||||
be added or deleted.
|
||||
|
||||
This method B<only> recurses into hashes that are referenced by another hash.
|
||||
Thus a Hash of Hashes (HoH) will all be restricted, but a Hash of Arrays of
|
||||
Hashes (HoAoH) will only have the top hash restricted.
|
||||
|
||||
unlock_hash_recurse(%hash);
|
||||
|
||||
unlock_hash_recurse() does the opposite of lock_hash_recurse(). All keys and
|
||||
values are made writable. All values can be changed and keys can be added
|
||||
and deleted. Identical recursion restrictions apply as to lock_hash_recurse().
|
||||
|
||||
Returns a reference to the %hash.
|
||||
|
||||
=cut
|
||||
|
||||
sub lock_hashref_recurse {
|
||||
my $hash = shift;
|
||||
|
||||
lock_ref_keys($hash);
|
||||
foreach my $value (values %$hash) {
|
||||
my $type = reftype($value);
|
||||
if (defined($type) and $type eq 'HASH') {
|
||||
lock_hashref_recurse($value);
|
||||
}
|
||||
Internals::SvREADONLY($value,1);
|
||||
}
|
||||
return $hash
|
||||
}
|
||||
|
||||
sub unlock_hashref_recurse {
|
||||
my $hash = shift;
|
||||
|
||||
foreach my $value (values %$hash) {
|
||||
my $type = reftype($value);
|
||||
if (defined($type) and $type eq 'HASH') {
|
||||
unlock_hashref_recurse($value);
|
||||
}
|
||||
Internals::SvREADONLY($value,0);
|
||||
}
|
||||
unlock_ref_keys($hash);
|
||||
return $hash;
|
||||
}
|
||||
|
||||
sub lock_hash_recurse (\%) { lock_hashref_recurse(@_) }
|
||||
sub unlock_hash_recurse (\%) { unlock_hashref_recurse(@_) }
|
||||
|
||||
=item B<hashref_locked>
|
||||
|
||||
=item B<hash_locked>
|
||||
|
||||
hashref_locked(\%hash) and print "Hash is locked!\n";
|
||||
hash_locked(%hash) and print "Hash is locked!\n";
|
||||
|
||||
Returns true if the hash and its keys are locked.
|
||||
|
||||
=cut
|
||||
|
||||
sub hashref_locked {
|
||||
my $hash=shift;
|
||||
Internals::SvREADONLY(%$hash);
|
||||
}
|
||||
|
||||
sub hash_locked(\%) { hashref_locked(@_) }
|
||||
|
||||
=item B<hashref_unlocked>
|
||||
|
||||
=item B<hash_unlocked>
|
||||
|
||||
hashref_unlocked(\%hash) and print "Hash is unlocked!\n";
|
||||
hash_unlocked(%hash) and print "Hash is unlocked!\n";
|
||||
|
||||
Returns true if the hash and its keys are unlocked.
|
||||
|
||||
=cut
|
||||
|
||||
sub hashref_unlocked {
|
||||
my $hash=shift;
|
||||
!Internals::SvREADONLY(%$hash);
|
||||
}
|
||||
|
||||
sub hash_unlocked(\%) { hashref_unlocked(@_) }
|
||||
|
||||
=for demerphqs_editor
|
||||
sub legal_ref_keys{}
|
||||
sub hidden_ref_keys{}
|
||||
sub all_keys{}
|
||||
|
||||
=cut
|
||||
|
||||
sub legal_keys(\%) { legal_ref_keys(@_) }
|
||||
sub hidden_keys(\%){ hidden_ref_keys(@_) }
|
||||
|
||||
=item B<legal_keys>
|
||||
|
||||
my @keys = legal_keys(%hash);
|
||||
|
||||
Returns the list of the keys that are legal in a restricted hash.
|
||||
In the case of an unrestricted hash this is identical to calling
|
||||
keys(%hash).
|
||||
|
||||
=item B<hidden_keys>
|
||||
|
||||
my @keys = hidden_keys(%hash);
|
||||
|
||||
Returns the list of the keys that are legal in a restricted hash but
|
||||
do not have a value associated to them. Thus if 'foo' is a
|
||||
"hidden" key of the %hash it will return false for both C<defined>
|
||||
and C<exists> tests.
|
||||
|
||||
In the case of an unrestricted hash this will return an empty list.
|
||||
|
||||
B<NOTE> this is an experimental feature that is heavily dependent
|
||||
on the current implementation of restricted hashes. Should the
|
||||
implementation change, this routine may become meaningless, in which
|
||||
case it will return an empty list.
|
||||
|
||||
=item B<all_keys>
|
||||
|
||||
all_keys(%hash,@keys,@hidden);
|
||||
|
||||
Populates the arrays @keys with the all the keys that would pass
|
||||
an C<exists> tests, and populates @hidden with the remaining legal
|
||||
keys that have not been utilized.
|
||||
|
||||
Returns a reference to the hash.
|
||||
|
||||
In the case of an unrestricted hash this will be equivalent to
|
||||
|
||||
$ref = do {
|
||||
@keys = keys %hash;
|
||||
@hidden = ();
|
||||
\%hash
|
||||
};
|
||||
|
||||
B<NOTE> this is an experimental feature that is heavily dependent
|
||||
on the current implementation of restricted hashes. Should the
|
||||
implementation change this routine may become meaningless in which
|
||||
case it will behave identically to how it would behave on an
|
||||
unrestricted hash.
|
||||
|
||||
=item B<hash_seed>
|
||||
|
||||
my $hash_seed = hash_seed();
|
||||
|
||||
hash_seed() returns the seed bytes used to randomise hash ordering.
|
||||
|
||||
B<Note that the hash seed is sensitive information>: by knowing it one
|
||||
can craft a denial-of-service attack against Perl code, even remotely,
|
||||
see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
|
||||
B<Do not disclose the hash seed> to people who don't need to know it.
|
||||
See also L<perlrun/PERL_HASH_SEED_DEBUG>.
|
||||
|
||||
Prior to Perl 5.17.6 this function returned a UV, it now returns a string,
|
||||
which may be of nearly any size as determined by the hash function your
|
||||
Perl has been built with. Possible sizes may be but are not limited to
|
||||
4 bytes (for most hash algorithms) and 16 bytes (for siphash).
|
||||
|
||||
=item B<hash_value>
|
||||
|
||||
my $hash_value = hash_value($string);
|
||||
|
||||
hash_value() returns the current perl's internal hash value for a given
|
||||
string.
|
||||
|
||||
Returns a 32 bit integer representing the hash value of the string passed
|
||||
in. This value is only reliable for the lifetime of the process. It may
|
||||
be different depending on invocation, environment variables, perl version,
|
||||
architectures, and build options.
|
||||
|
||||
B<Note that the hash value of a given string is sensitive information>:
|
||||
by knowing it one can deduce the hash seed which in turn can allow one to
|
||||
craft a denial-of-service attack against Perl code, even remotely,
|
||||
see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
|
||||
B<Do not disclose the hash value of a string> to people who don't need to
|
||||
know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>.
|
||||
|
||||
=item B<bucket_info>
|
||||
|
||||
Return a set of basic information about a hash.
|
||||
|
||||
my ($keys, $buckets, $used, @length_counts)= bucket_info($hash);
|
||||
|
||||
Fields are as follows:
|
||||
|
||||
0: Number of keys in the hash
|
||||
1: Number of buckets in the hash
|
||||
2: Number of used buckets in the hash
|
||||
rest : list of counts, Kth element is the number of buckets
|
||||
with K keys in it.
|
||||
|
||||
See also bucket_stats() and bucket_array().
|
||||
|
||||
=item B<bucket_stats>
|
||||
|
||||
Returns a list of statistics about a hash.
|
||||
|
||||
my ($keys, $buckets, $used, $quality, $utilization_ratio,
|
||||
$collision_pct, $mean, $stddev, @length_counts)
|
||||
= bucket_stats($hashref);
|
||||
|
||||
Fields are as follows:
|
||||
|
||||
0: Number of keys in the hash
|
||||
1: Number of buckets in the hash
|
||||
2: Number of used buckets in the hash
|
||||
3: Hash Quality Score
|
||||
4: Percent of buckets used
|
||||
5: Percent of keys which are in collision
|
||||
6: Mean bucket length of occupied buckets
|
||||
7: Standard Deviation of bucket lengths of occupied buckets
|
||||
rest : list of counts, Kth element is the number of buckets
|
||||
with K keys in it.
|
||||
|
||||
See also bucket_info() and bucket_array().
|
||||
|
||||
Note that Hash Quality Score would be 1 for an ideal hash, numbers
|
||||
close to and below 1 indicate good hashing, and number significantly
|
||||
above indicate a poor score. In practice it should be around 0.95 to 1.05.
|
||||
It is defined as:
|
||||
|
||||
$score= sum( $count[$length] * ($length * ($length + 1) / 2) )
|
||||
/
|
||||
( ( $keys / 2 * $buckets ) *
|
||||
( $keys + ( 2 * $buckets ) - 1 ) )
|
||||
|
||||
The formula is from the Red Dragon book (reformulated to use the data available)
|
||||
and is documented at L<http://www.strchr.com/hash_functions>
|
||||
|
||||
=item B<bucket_array>
|
||||
|
||||
my $array= bucket_array(\%hash);
|
||||
|
||||
Returns a packed representation of the bucket array associated with a hash. Each element
|
||||
of the array is either an integer K, in which case it represents K empty buckets, or
|
||||
a reference to another array which contains the keys that are in that bucket.
|
||||
|
||||
B<Note that the information returned by bucket_array is sensitive information>:
|
||||
by knowing it one can directly attack perl's hash function which in turn may allow
|
||||
one to craft a denial-of-service attack against Perl code, even remotely,
|
||||
see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
|
||||
B<Do not disclose the output of this function> to people who don't need to
|
||||
know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>. This function is provided strictly
|
||||
for debugging and diagnostics purposes only, it is hard to imagine a reason why it
|
||||
would be used in production code.
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
sub bucket_stats {
|
||||
my ($hash) = @_;
|
||||
my ($keys, $buckets, $used, @length_counts) = bucket_info($hash);
|
||||
my $sum;
|
||||
my $score;
|
||||
for (1 .. $#length_counts) {
|
||||
$sum += ($length_counts[$_] * $_);
|
||||
$score += $length_counts[$_] * ( $_ * ($_ + 1 ) / 2 );
|
||||
}
|
||||
$score = $score /
|
||||
(( $keys / (2 * $buckets )) * ( $keys + ( 2 * $buckets ) - 1 ))
|
||||
if $keys;
|
||||
my ($mean, $stddev)= (0, 0);
|
||||
if ($used) {
|
||||
$mean= $sum / $used;
|
||||
$sum= 0;
|
||||
$sum += ($length_counts[$_] * (($_-$mean)**2)) for 1 .. $#length_counts;
|
||||
|
||||
$stddev= sqrt($sum/$used);
|
||||
}
|
||||
return $keys, $buckets, $used, $keys ? ($score, $used/$buckets, ($keys-$used)/$keys, $mean, $stddev, @length_counts) : ();
|
||||
}
|
||||
|
||||
=item B<bucket_stats_formatted>
|
||||
|
||||
print bucket_stats_formatted($hashref);
|
||||
|
||||
Return a formatted report of the information returned by bucket_stats().
|
||||
An example report looks like this:
|
||||
|
||||
Keys: 50 Buckets: 33/64 Quality-Score: 1.01 (Good)
|
||||
Utilized Buckets: 51.56% Optimal: 78.12% Keys In Collision: 34.00%
|
||||
Chain Length - mean: 1.52 stddev: 0.66
|
||||
Buckets 64 [0000000000000000000000000000000111111111111111111122222222222333]
|
||||
Len 0 Pct: 48.44 [###############################]
|
||||
Len 1 Pct: 29.69 [###################]
|
||||
Len 2 Pct: 17.19 [###########]
|
||||
Len 3 Pct: 4.69 [###]
|
||||
Keys 50 [11111111111111111111111111111111122222222222222333]
|
||||
Pos 1 Pct: 66.00 [#################################]
|
||||
Pos 2 Pct: 28.00 [##############]
|
||||
Pos 3 Pct: 6.00 [###]
|
||||
|
||||
The first set of stats gives some summary statistical information,
|
||||
including the quality score translated into "Good", "Poor" and "Bad",
|
||||
(score<=1.05, score<=1.2, score>1.2). See the documentation in
|
||||
bucket_stats() for more details.
|
||||
|
||||
The two sets of barcharts give stats and a visual indication of performance
|
||||
of the hash.
|
||||
|
||||
The first gives data on bucket chain lengths and provides insight on how
|
||||
much work a fetch *miss* will take. In this case we have to inspect every item
|
||||
in a bucket before we can be sure the item is not in the list. The performance
|
||||
for an insert is equivalent to this case, as is a delete where the item
|
||||
is not in the hash.
|
||||
|
||||
The second gives data on how many keys are at each depth in the chain, and
|
||||
gives an idea of how much work a fetch *hit* will take. The performance for
|
||||
an update or delete of an item in the hash is equivalent to this case.
|
||||
|
||||
Note that these statistics are summary only. Actual performance will depend
|
||||
on real hit/miss ratios accessing the hash. If you are concerned by hit ratios
|
||||
you are recommended to "oversize" your hash by using something like:
|
||||
|
||||
keys(%hash)= keys(%hash) << $k;
|
||||
|
||||
With $k chosen carefully, and likely to be a small number like 1 or 2. In
|
||||
theory the larger the bucket array the less chance of collision.
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
sub _bucket_stats_formatted_bars {
|
||||
my ($total, $ary, $start_idx, $title, $row_title)= @_;
|
||||
|
||||
my $return = "";
|
||||
my $max_width= $total > 64 ? 64 : $total;
|
||||
my $bar_width= $max_width / $total;
|
||||
|
||||
my $str= "";
|
||||
if ( @$ary < 10) {
|
||||
for my $idx ($start_idx .. $#$ary) {
|
||||
$str .= $idx x sprintf("%.0f", ($ary->[$idx] * $bar_width));
|
||||
}
|
||||
} else {
|
||||
$str= "-" x $max_width;
|
||||
}
|
||||
$return .= sprintf "%-7s %6d [%s]\n",$title, $total, $str;
|
||||
|
||||
foreach my $idx ($start_idx .. $#$ary) {
|
||||
$return .= sprintf "%-.3s %3d %6.2f%% %6d [%s]\n",
|
||||
$row_title,
|
||||
$idx,
|
||||
$ary->[$idx] / $total * 100,
|
||||
$ary->[$idx],
|
||||
"#" x sprintf("%.0f", ($ary->[$idx] * $bar_width)),
|
||||
;
|
||||
}
|
||||
return $return;
|
||||
}
|
||||
|
||||
sub bucket_stats_formatted {
|
||||
my ($hashref)= @_;
|
||||
my ($keys, $buckets, $used, $score, $utilization_ratio, $collision_pct,
|
||||
$mean, $stddev, @length_counts) = bucket_stats($hashref);
|
||||
|
||||
my $return= sprintf "Keys: %d Buckets: %d/%d Quality-Score: %.2f (%s)\n"
|
||||
. "Utilized Buckets: %.2f%% Optimal: %.2f%% Keys In Collision: %.2f%%\n"
|
||||
. "Chain Length - mean: %.2f stddev: %.2f\n",
|
||||
$keys, $used, $buckets, $score, $score <= 1.05 ? "Good" : $score < 1.2 ? "Poor" : "Bad",
|
||||
$utilization_ratio * 100,
|
||||
$keys/$buckets * 100,
|
||||
$collision_pct * 100,
|
||||
$mean, $stddev;
|
||||
|
||||
my @key_depth;
|
||||
$key_depth[$_]= $length_counts[$_] + ( $key_depth[$_+1] || 0 )
|
||||
for reverse 1 .. $#length_counts;
|
||||
|
||||
if ($keys) {
|
||||
$return .= _bucket_stats_formatted_bars($buckets, \@length_counts, 0, "Buckets", "Len");
|
||||
$return .= _bucket_stats_formatted_bars($keys, \@key_depth, 1, "Keys", "Pos");
|
||||
}
|
||||
return $return
|
||||
}
|
||||
|
||||
=item B<hv_store>
|
||||
|
||||
my $sv = 0;
|
||||
hv_store(%hash,$key,$sv) or die "Failed to alias!";
|
||||
$hash{$key} = 1;
|
||||
print $sv; # prints 1
|
||||
|
||||
Stores an alias to a variable in a hash instead of copying the value.
|
||||
|
||||
=item B<hash_traversal_mask>
|
||||
|
||||
As of Perl 5.18 every hash has its own hash traversal order, and this order
|
||||
changes every time a new element is inserted into the hash. This functionality
|
||||
is provided by maintaining an unsigned integer mask (U32) which is xor'ed
|
||||
with the actual bucket id during a traversal of the hash buckets using keys(),
|
||||
values() or each().
|
||||
|
||||
You can use this subroutine to get and set the traversal mask for a specific
|
||||
hash. Setting the mask ensures that a given hash will produce the same key
|
||||
order. B<Note> that this does B<not> guarantee that B<two> hashes will produce
|
||||
the same key order for the same hash seed and traversal mask, items that
|
||||
collide into one bucket may have different orders regardless of this setting.
|
||||
|
||||
=item B<bucket_ratio>
|
||||
|
||||
This function behaves the same way that scalar(%hash) behaved prior to
|
||||
Perl 5.25. Specifically if the hash is tied, then it calls the SCALAR tied
|
||||
hash method, if untied then if the hash is empty it return 0, otherwise it
|
||||
returns a string containing the number of used buckets in the hash,
|
||||
followed by a slash, followed by the total number of buckets in the hash.
|
||||
|
||||
my %hash=("foo"=>1);
|
||||
print Hash::Util::bucket_ratio(%hash); # prints "1/8"
|
||||
|
||||
=item B<used_buckets>
|
||||
|
||||
This function returns the count of used buckets in the hash. It is expensive
|
||||
to calculate and the value is NOT cached, so avoid use of this function
|
||||
in production code.
|
||||
|
||||
=item B<num_buckets>
|
||||
|
||||
This function returns the total number of buckets the hash holds, or would
|
||||
hold if the array were created. (When a hash is freshly created the array
|
||||
may not be allocated even though this value will be non-zero.)
|
||||
|
||||
=back
|
||||
|
||||
=head2 Operating on references to hashes.
|
||||
|
||||
Most subroutines documented in this module have equivalent versions
|
||||
that operate on references to hashes instead of native hashes.
|
||||
The following is a list of these subs. They are identical except
|
||||
in name and in that instead of taking a %hash they take a $hashref,
|
||||
and additionally are not prototyped.
|
||||
|
||||
=over 4
|
||||
|
||||
=item lock_ref_keys
|
||||
|
||||
=item unlock_ref_keys
|
||||
|
||||
=item lock_ref_keys_plus
|
||||
|
||||
=item lock_ref_value
|
||||
|
||||
=item unlock_ref_value
|
||||
|
||||
=item lock_hashref
|
||||
|
||||
=item unlock_hashref
|
||||
|
||||
=item lock_hashref_recurse
|
||||
|
||||
=item unlock_hashref_recurse
|
||||
|
||||
=item hash_ref_unlocked
|
||||
|
||||
=item legal_ref_keys
|
||||
|
||||
=item hidden_ref_keys
|
||||
|
||||
=back
|
||||
|
||||
=head1 CAVEATS
|
||||
|
||||
Note that the trapping of the restricted operations is not atomic:
|
||||
for example
|
||||
|
||||
eval { %hash = (illegal_key => 1) }
|
||||
|
||||
leaves the C<%hash> empty rather than with its original contents.
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
The interface exposed by this module is very close to the current
|
||||
implementation of restricted hashes. Over time it is expected that
|
||||
this behavior will be extended and the interface abstracted further.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Michael G Schwern <schwern@pobox.com> on top of code by Nick
|
||||
Ing-Simmons and Jeffrey Friedl.
|
||||
|
||||
hv_store() is from Array::RefElem, Copyright 2000 Gisle Aas.
|
||||
|
||||
Additional code by Yves Orton.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<Scalar::Util>, L<List::Util> and L<perlsec/"Algorithmic Complexity Attacks">.
|
||||
|
||||
L<Hash::Util::FieldHash>.
|
||||
|
||||
=cut
|
||||
|
||||
1;
|
||||
861
database/perl/lib/Hash/Util/FieldHash.pm
Normal file
861
database/perl/lib/Hash/Util/FieldHash.pm
Normal file
@@ -0,0 +1,861 @@
|
||||
package Hash::Util::FieldHash;
|
||||
|
||||
use 5.009004;
|
||||
use strict;
|
||||
use warnings;
|
||||
use Scalar::Util qw( reftype);
|
||||
|
||||
our $VERSION = '1.20';
|
||||
|
||||
require Exporter;
|
||||
our @ISA = qw(Exporter);
|
||||
our %EXPORT_TAGS = (
|
||||
'all' => [ qw(
|
||||
fieldhash
|
||||
fieldhashes
|
||||
idhash
|
||||
idhashes
|
||||
id
|
||||
id_2obj
|
||||
register
|
||||
)],
|
||||
);
|
||||
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
|
||||
|
||||
{
|
||||
require XSLoader;
|
||||
my %ob_reg; # private object registry
|
||||
sub _ob_reg { \ %ob_reg }
|
||||
XSLoader::load();
|
||||
}
|
||||
|
||||
sub fieldhash (\%) {
|
||||
for ( shift ) {
|
||||
return unless ref() && reftype( $_) eq 'HASH';
|
||||
return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0);
|
||||
return $_ if Hash::Util::FieldHash::_fieldhash( $_, 2) == 2;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
sub idhash (\%) {
|
||||
for ( shift ) {
|
||||
return unless ref() && reftype( $_) eq 'HASH';
|
||||
return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0);
|
||||
return $_ if Hash::Util::FieldHash::_fieldhash( $_, 1) == 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
sub fieldhashes { map &fieldhash( $_), @_ }
|
||||
sub idhashes { map &idhash( $_), @_ }
|
||||
|
||||
1;
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Hash::Util::FieldHash - Support for Inside-Out Classes
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
### Create fieldhashes
|
||||
use Hash::Util qw(fieldhash fieldhashes);
|
||||
|
||||
# Create a single field hash
|
||||
fieldhash my %foo;
|
||||
|
||||
# Create three at once...
|
||||
fieldhashes \ my(%foo, %bar, %baz);
|
||||
# ...or any number
|
||||
fieldhashes @hashrefs;
|
||||
|
||||
### Create an idhash and register it for garbage collection
|
||||
use Hash::Util::FieldHash qw(idhash register);
|
||||
idhash my %name;
|
||||
my $object = \ do { my $o };
|
||||
# register the idhash for garbage collection with $object
|
||||
register($object, \ %name);
|
||||
# the following entry will be deleted when $object goes out of scope
|
||||
$name{$object} = 'John Doe';
|
||||
|
||||
### Register an ordinary hash for garbage collection
|
||||
use Hash::Util::FieldHash qw(id register);
|
||||
my %name;
|
||||
my $object = \ do { my $o };
|
||||
# register the hash %name for garbage collection of $object's id
|
||||
register $object, \ %name;
|
||||
# the following entry will be deleted when $object goes out of scope
|
||||
$name{id $object} = 'John Doe';
|
||||
|
||||
=head1 FUNCTIONS
|
||||
|
||||
C<Hash::Util::FieldHash> offers a number of functions in support of
|
||||
L<The Inside-out Technique> of class construction.
|
||||
|
||||
=over
|
||||
|
||||
=item id
|
||||
|
||||
id($obj)
|
||||
|
||||
Returns the reference address of a reference $obj. If $obj is
|
||||
not a reference, returns $obj.
|
||||
|
||||
This function is a stand-in replacement for
|
||||
L<Scalar::Util::refaddr|Scalar::Util/refaddr>,
|
||||
that is, it returns
|
||||
the reference address of its argument as a numeric value. The only
|
||||
difference is that C<refaddr()> returns C<undef> when given a
|
||||
non-reference while C<id()> returns its argument unchanged.
|
||||
|
||||
C<id()> also uses a caching technique that makes it faster when
|
||||
the id of an object is requested often, but slower if it is needed
|
||||
only once or twice.
|
||||
|
||||
=item id_2obj
|
||||
|
||||
$obj = id_2obj($id)
|
||||
|
||||
If C<$id> is the id of a registered object (see L</register>), returns
|
||||
the object, otherwise an undefined value. For registered objects this
|
||||
is the inverse function of C<id()>.
|
||||
|
||||
=item register
|
||||
|
||||
register($obj)
|
||||
register($obj, @hashrefs)
|
||||
|
||||
In the first form, registers an object to work with for the function
|
||||
C<id_2obj()>. In the second form, it additionally marks the given
|
||||
hashrefs down for garbage collection. This means that when the object
|
||||
goes out of scope, any entries in the given hashes under the key of
|
||||
C<id($obj)> will be deleted from the hashes.
|
||||
|
||||
It is a fatal error to register a non-reference $obj. Any non-hashrefs
|
||||
among the following arguments are silently ignored.
|
||||
|
||||
It is I<not> an error to register the same object multiple times with
|
||||
varying sets of hashrefs. Any hashrefs that are not registered yet
|
||||
will be added, others ignored.
|
||||
|
||||
Registry also implies thread support. When a new thread is created,
|
||||
all references are replaced with new ones, including all objects.
|
||||
If a hash uses the reference address of an object as a key, that
|
||||
connection would be broken. With a registered object, its id will
|
||||
be updated in all hashes registered with it.
|
||||
|
||||
=item idhash
|
||||
|
||||
idhash my %hash
|
||||
|
||||
Makes an idhash from the argument, which must be a hash.
|
||||
|
||||
An I<idhash> works like a normal hash, except that it stringifies a
|
||||
I<reference used as a key> differently. A reference is stringified
|
||||
as if the C<id()> function had been invoked on it, that is, its
|
||||
reference address in decimal is used as the key.
|
||||
|
||||
=item idhashes
|
||||
|
||||
idhashes \ my(%hash, %gnash, %trash)
|
||||
idhashes \ @hashrefs
|
||||
|
||||
Creates many idhashes from its hashref arguments. Returns those
|
||||
arguments that could be converted or their number in scalar context.
|
||||
|
||||
=item fieldhash
|
||||
|
||||
fieldhash %hash;
|
||||
|
||||
Creates a single fieldhash. The argument must be a hash. Returns
|
||||
a reference to the given hash if successful, otherwise nothing.
|
||||
|
||||
A I<fieldhash> is, in short, an idhash with auto-registry. When an
|
||||
object (or, indeed, any reference) is used as a fieldhash key, the
|
||||
fieldhash is automatically registered for garbage collection with
|
||||
the object, as if C<register $obj, \ %fieldhash> had been called.
|
||||
|
||||
=item fieldhashes
|
||||
|
||||
fieldhashes @hashrefs;
|
||||
|
||||
Creates any number of field hashes. Arguments must be hash references.
|
||||
Returns the converted hashrefs in list context, their number in scalar
|
||||
context.
|
||||
|
||||
=back
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
A word on terminology: I shall use the term I<field> for a scalar
|
||||
piece of data that a class associates with an object. Other terms that
|
||||
have been used for this concept are "object variable", "(object) property",
|
||||
"(object) attribute" and more. Especially "attribute" has some currency
|
||||
among Perl programmer, but that clashes with the C<attributes> pragma. The
|
||||
term "field" also has some currency in this sense and doesn't seem
|
||||
to conflict with other Perl terminology.
|
||||
|
||||
In Perl, an object is a blessed reference. The standard way of associating
|
||||
data with an object is to store the data inside the object's body, that is,
|
||||
the piece of data pointed to by the reference.
|
||||
|
||||
In consequence, if two or more classes want to access an object they
|
||||
I<must> agree on the type of reference and also on the organization of
|
||||
data within the object body. Failure to agree on the type results in
|
||||
immediate death when the wrong method tries to access an object. Failure
|
||||
to agree on data organization may lead to one class trampling over the
|
||||
data of another.
|
||||
|
||||
This object model leads to a tight coupling between subclasses.
|
||||
If one class wants to inherit from another (and both classes access
|
||||
object data), the classes must agree about implementation details.
|
||||
Inheritance can only be used among classes that are maintained together,
|
||||
in a single source or not.
|
||||
|
||||
In particular, it is not possible to write general-purpose classes
|
||||
in this technique, classes that can advertise themselves as "Put me
|
||||
on your @ISA list and use my methods". If the other class has different
|
||||
ideas about how the object body is used, there is trouble.
|
||||
|
||||
For reference C<Name_hash> in L</Example 1> shows the standard implementation of
|
||||
a simple class C<Name> in the well-known hash based way. It also demonstrates
|
||||
the predictable failure to construct a common subclass C<NamedFile>
|
||||
of C<Name> and the class C<IO::File> (whose objects I<must> be globrefs).
|
||||
|
||||
Thus, techniques are of interest that store object data I<not> in
|
||||
the object body but some other place.
|
||||
|
||||
=head2 The Inside-out Technique
|
||||
|
||||
With I<inside-out> classes, each class declares a (typically lexical)
|
||||
hash for each field it wants to use. The reference address of an
|
||||
object is used as the hash key. By definition, the reference address
|
||||
is unique to each object so this guarantees a place for each field that
|
||||
is private to the class and unique to each object. See C<Name_id>
|
||||
in L</Example 1> for a simple example.
|
||||
|
||||
In comparison to the standard implementation where the object is a
|
||||
hash and the fields correspond to hash keys, here the fields correspond
|
||||
to hashes, and the object determines the hash key. Thus the hashes
|
||||
appear to be turned I<inside out>.
|
||||
|
||||
The body of an object is never examined by an inside-out class, only
|
||||
its reference address is used. This allows for the body of an actual
|
||||
object to be I<anything at all> while the object methods of the class
|
||||
still work as designed. This is a key feature of inside-out classes.
|
||||
|
||||
=head2 Problems of Inside-out
|
||||
|
||||
Inside-out classes give us freedom of inheritance, but as usual there
|
||||
is a price.
|
||||
|
||||
Most obviously, there is the necessity of retrieving the reference
|
||||
address of an object for each data access. It's a minor inconvenience,
|
||||
but it does clutter the code.
|
||||
|
||||
More important (and less obvious) is the necessity of garbage
|
||||
collection. When a normal object dies, anything stored in the
|
||||
object body is garbage-collected by perl. With inside-out objects,
|
||||
Perl knows nothing about the data stored in field hashes by a class,
|
||||
but these must be deleted when the object goes out of scope. Thus
|
||||
the class must provide a C<DESTROY> method to take care of that.
|
||||
|
||||
In the presence of multiple classes it can be non-trivial
|
||||
to make sure that every relevant destructor is called for
|
||||
every object. Perl calls the first one it finds on the
|
||||
inheritance tree (if any) and that's it.
|
||||
|
||||
A related issue is thread-safety. When a new thread is created,
|
||||
the Perl interpreter is cloned, which implies that all reference
|
||||
addresses in use will be replaced with new ones. Thus, if a class
|
||||
tries to access a field of a cloned object its (cloned) data will
|
||||
still be stored under the now invalid reference address of the
|
||||
original in the parent thread. A general C<CLONE> method must
|
||||
be provided to re-establish the association.
|
||||
|
||||
=head2 Solutions
|
||||
|
||||
C<Hash::Util::FieldHash> addresses these issues on several
|
||||
levels.
|
||||
|
||||
The C<id()> function is provided in addition to the
|
||||
existing C<Scalar::Util::refaddr()>. Besides its short name
|
||||
it can be a little faster under some circumstances (and a
|
||||
bit slower under others). Benchmark if it matters. The
|
||||
working of C<id()> also allows the use of the class name
|
||||
as a I<generic object> as described L<further down|/"The Generic Object">.
|
||||
|
||||
The C<id()> function is incorporated in I<id hashes> in the sense
|
||||
that it is called automatically on every key that is used with
|
||||
the hash. No explicit call is necessary.
|
||||
|
||||
The problems of garbage collection and thread safety are both
|
||||
addressed by the function C<register()>. It registers an object
|
||||
together with any number of hashes. Registry means that when the
|
||||
object dies, an entry in any of the hashes under the reference
|
||||
address of this object will be deleted. This guarantees garbage
|
||||
collection in these hashes. It also means that on thread
|
||||
cloning the object's entries in registered hashes will be
|
||||
replaced with updated entries whose key is the cloned object's
|
||||
reference address. Thus the object-data association becomes
|
||||
thread-safe.
|
||||
|
||||
Object registry is best done when the object is initialized
|
||||
for use with a class. That way, garbage collection and thread
|
||||
safety are established for every object and every field that is
|
||||
initialized.
|
||||
|
||||
Finally, I<field hashes> incorporate all these functions in one
|
||||
package. Besides automatically calling the C<id()> function
|
||||
on every object used as a key, the object is registered with
|
||||
the field hash on first use. Classes based on field hashes
|
||||
are fully garbage-collected and thread safe without further
|
||||
measures.
|
||||
|
||||
=head2 More Problems
|
||||
|
||||
Another problem that occurs with inside-out classes is serialization.
|
||||
Since the object data is not in its usual place, standard routines
|
||||
like C<Storable::freeze()>, C<Storable::thaw()> and
|
||||
C<Data::Dumper::Dumper()> can't deal with it on their own. Both
|
||||
C<Data::Dumper> and C<Storable> provide the necessary hooks to
|
||||
make things work, but the functions or methods used by the hooks
|
||||
must be provided by each inside-out class.
|
||||
|
||||
A general solution to the serialization problem would require another
|
||||
level of registry, one that associates I<classes> and fields.
|
||||
So far, the functions of C<Hash::Util::FieldHash> are unaware of
|
||||
any classes, which I consider a feature. Therefore C<Hash::Util::FieldHash>
|
||||
doesn't address the serialization problems.
|
||||
|
||||
=head2 The Generic Object
|
||||
|
||||
Classes based on the C<id()> function (and hence classes based on
|
||||
C<idhash()> and C<fieldhash()>) show a peculiar behavior in that
|
||||
the class name can be used like an object. Specifically, methods
|
||||
that set or read data associated with an object continue to work as
|
||||
class methods, just as if the class name were an object, distinct from
|
||||
all other objects, with its own data. This object may be called
|
||||
the I<generic object> of the class.
|
||||
|
||||
This works because field hashes respond to keys that are not references
|
||||
like a normal hash would and use the string offered as the hash key.
|
||||
Thus, if a method is called as a class method, the field hash is presented
|
||||
with the class name instead of an object and blithely uses it as a key.
|
||||
Since the keys of real objects are decimal numbers, there is no
|
||||
conflict and the slot in the field hash can be used like any other.
|
||||
The C<id()> function behaves correspondingly with respect to non-reference
|
||||
arguments.
|
||||
|
||||
Two possible uses (besides ignoring the property) come to mind.
|
||||
A singleton class could be implemented this using the generic object.
|
||||
If necessary, an C<init()> method could die or ignore calls with
|
||||
actual objects (references), so only the generic object will ever exist.
|
||||
|
||||
Another use of the generic object would be as a template. It is
|
||||
a convenient place to store class-specific defaults for various
|
||||
fields to be used in actual object initialization.
|
||||
|
||||
Usually, the feature can be entirely ignored. Calling I<object
|
||||
methods> as I<class methods> normally leads to an error and isn't used
|
||||
routinely anywhere. It may be a problem that this error isn't
|
||||
indicated by a class with a generic object.
|
||||
|
||||
=head2 How to use Field Hashes
|
||||
|
||||
Traditionally, the definition of an inside-out class contains a bare
|
||||
block inside which a number of lexical hashes are declared and the
|
||||
basic accessor methods defined, usually through C<Scalar::Util::refaddr>.
|
||||
Further methods may be defined outside this block. There has to be
|
||||
a DESTROY method and, for thread support, a CLONE method.
|
||||
|
||||
When field hashes are used, the basic structure remains the same.
|
||||
Each lexical hash will be made a field hash. The call to C<refaddr>
|
||||
can be omitted from the accessor methods. DESTROY and CLONE methods
|
||||
are not necessary.
|
||||
|
||||
If you have an existing inside-out class, simply making all hashes
|
||||
field hashes with no other change should make no difference. Through
|
||||
the calls to C<refaddr> or equivalent, the field hashes never get to
|
||||
see a reference and work like normal hashes. Your DESTROY (and
|
||||
CLONE) methods are still needed.
|
||||
|
||||
To make the field hashes kick in, it is easiest to redefine C<refaddr>
|
||||
as
|
||||
|
||||
sub refaddr { shift }
|
||||
|
||||
instead of importing it from C<Scalar::Util>. It should now be possible
|
||||
to disable DESTROY and CLONE. Note that while it isn't disabled,
|
||||
DESTROY will be called before the garbage collection of field hashes,
|
||||
so it will be invoked with a functional object and will continue to
|
||||
function.
|
||||
|
||||
It is not desirable to import the functions C<fieldhash> and/or
|
||||
C<fieldhashes> into every class that is going to use them. They
|
||||
are only used once to set up the class. When the class is up and running,
|
||||
these functions serve no more purpose.
|
||||
|
||||
If there are only a few field hashes to declare, it is simplest to
|
||||
|
||||
use Hash::Util::FieldHash;
|
||||
|
||||
early and call the functions qualified:
|
||||
|
||||
Hash::Util::FieldHash::fieldhash my %foo;
|
||||
|
||||
Otherwise, import the functions into a convenient package like
|
||||
C<HUF> or, more general, C<Aux>
|
||||
|
||||
{
|
||||
package Aux;
|
||||
use Hash::Util::FieldHash ':all';
|
||||
}
|
||||
|
||||
and call
|
||||
|
||||
Aux::fieldhash my %foo;
|
||||
|
||||
as needed.
|
||||
|
||||
=head2 Garbage-Collected Hashes
|
||||
|
||||
Garbage collection in a field hash means that entries will "spontaneously"
|
||||
disappear when the object that created them disappears. That must be
|
||||
borne in mind, especially when looping over a field hash. If anything
|
||||
you do inside the loop could cause an object to go out of scope, a
|
||||
random key may be deleted from the hash you are looping over. That
|
||||
can throw the loop iterator, so it's best to cache a consistent snapshot
|
||||
of the keys and/or values and loop over that. You will still have to
|
||||
check that a cached entry still exists when you get to it.
|
||||
|
||||
Garbage collection can be confusing when keys are created in a field hash
|
||||
from normal scalars as well as references. Once a reference is I<used> with
|
||||
a field hash, the entry will be collected, even if it was later overwritten
|
||||
with a plain scalar key (every positive integer is a candidate). This
|
||||
is true even if the original entry was deleted in the meantime. In fact,
|
||||
deletion from a field hash, and also a test for existence constitute
|
||||
I<use> in this sense and create a liability to delete the entry when
|
||||
the reference goes out of scope. If you happen to create an entry
|
||||
with an identical key from a string or integer, that will be collected
|
||||
instead. Thus, mixed use of references and plain scalars as field hash
|
||||
keys is not entirely supported.
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
The examples show a very simple class that implements a I<name>, consisting
|
||||
of a first and last name (no middle initial). The name class has four
|
||||
methods:
|
||||
|
||||
=over
|
||||
|
||||
=item * C<init()>
|
||||
|
||||
An object method that initializes the first and last name to its
|
||||
two arguments. If called as a class method, C<init()> creates an
|
||||
object in the given class and initializes that.
|
||||
|
||||
=item * C<first()>
|
||||
|
||||
Retrieve the first name
|
||||
|
||||
=item * C<last()>
|
||||
|
||||
Retrieve the last name
|
||||
|
||||
=item * C<name()>
|
||||
|
||||
Retrieve the full name, the first and last name joined by a blank.
|
||||
|
||||
=back
|
||||
|
||||
The examples show this class implemented with different levels of
|
||||
support by C<Hash::Util::FieldHash>. All supported combinations
|
||||
are shown. The difference between implementations is often quite
|
||||
small. The implementations are:
|
||||
|
||||
=over
|
||||
|
||||
=item * C<Name_hash>
|
||||
|
||||
A conventional (not inside-out) implementation where an object is
|
||||
a hash that stores the field values, without support by
|
||||
C<Hash::Util::FieldHash>. This implementation doesn't allow
|
||||
arbitrary inheritance.
|
||||
|
||||
=item * C<Name_id>
|
||||
|
||||
Inside-out implementation based on the C<id()> function. It needs
|
||||
a C<DESTROY> method. For thread support a C<CLONE> method (not shown)
|
||||
would also be needed. Instead of C<Hash::Util::FieldHash::id()> the
|
||||
function C<Scalar::Util::refaddr> could be used with very little
|
||||
functional difference. This is the basic pattern of an inside-out
|
||||
class.
|
||||
|
||||
=item * C<Name_idhash>
|
||||
|
||||
Idhash-based inside-out implementation. Like C<Name_id> it needs
|
||||
a C<DESTROY> method and would need C<CLONE> for thread support.
|
||||
|
||||
=item * C<Name_id_reg>
|
||||
|
||||
Inside-out implementation based on the C<id()> function with explicit
|
||||
object registry. No destructor is needed and objects are thread safe.
|
||||
|
||||
=item * C<Name_idhash_reg>
|
||||
|
||||
Idhash-based inside-out implementation with explicit object registry.
|
||||
No destructor is needed and objects are thread safe.
|
||||
|
||||
=item * C<Name_fieldhash>
|
||||
|
||||
FieldHash-based inside-out implementation. Object registry happens
|
||||
automatically. No destructor is needed and objects are thread safe.
|
||||
|
||||
=back
|
||||
|
||||
These examples are realized in the code below, which could be copied
|
||||
to a file F<Example.pm>.
|
||||
|
||||
=head2 Example 1
|
||||
|
||||
use strict; use warnings;
|
||||
|
||||
{
|
||||
package Name_hash; # standard implementation: the
|
||||
# object is a hash
|
||||
sub init {
|
||||
my $obj = shift;
|
||||
my ($first, $last) = @_;
|
||||
# create an object if called as class method
|
||||
$obj = bless {}, $obj unless ref $obj;
|
||||
$obj->{ first} = $first;
|
||||
$obj->{ last} = $last;
|
||||
$obj;
|
||||
}
|
||||
|
||||
sub first { shift()->{ first} }
|
||||
sub last { shift()->{ last} }
|
||||
|
||||
sub name {
|
||||
my $n = shift;
|
||||
join ' ' => $n->first, $n->last;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
package Name_id;
|
||||
use Hash::Util::FieldHash qw(id);
|
||||
|
||||
my (%first, %last);
|
||||
|
||||
sub init {
|
||||
my $obj = shift;
|
||||
my ($first, $last) = @_;
|
||||
# create an object if called as class method
|
||||
$obj = bless \ my $o, $obj unless ref $obj;
|
||||
$first{ id $obj} = $first;
|
||||
$last{ id $obj} = $last;
|
||||
$obj;
|
||||
}
|
||||
|
||||
sub first { $first{ id shift()} }
|
||||
sub last { $last{ id shift()} }
|
||||
|
||||
sub name {
|
||||
my $n = shift;
|
||||
join ' ' => $n->first, $n->last;
|
||||
}
|
||||
|
||||
sub DESTROY {
|
||||
my $id = id shift;
|
||||
delete $first{ $id};
|
||||
delete $last{ $id};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
package Name_idhash;
|
||||
use Hash::Util::FieldHash;
|
||||
|
||||
Hash::Util::FieldHash::idhashes( \ my (%first, %last) );
|
||||
|
||||
sub init {
|
||||
my $obj = shift;
|
||||
my ($first, $last) = @_;
|
||||
# create an object if called as class method
|
||||
$obj = bless \ my $o, $obj unless ref $obj;
|
||||
$first{ $obj} = $first;
|
||||
$last{ $obj} = $last;
|
||||
$obj;
|
||||
}
|
||||
|
||||
sub first { $first{ shift()} }
|
||||
sub last { $last{ shift()} }
|
||||
|
||||
sub name {
|
||||
my $n = shift;
|
||||
join ' ' => $n->first, $n->last;
|
||||
}
|
||||
|
||||
sub DESTROY {
|
||||
my $n = shift;
|
||||
delete $first{ $n};
|
||||
delete $last{ $n};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
package Name_id_reg;
|
||||
use Hash::Util::FieldHash qw(id register);
|
||||
|
||||
my (%first, %last);
|
||||
|
||||
sub init {
|
||||
my $obj = shift;
|
||||
my ($first, $last) = @_;
|
||||
# create an object if called as class method
|
||||
$obj = bless \ my $o, $obj unless ref $obj;
|
||||
register( $obj, \ (%first, %last) );
|
||||
$first{ id $obj} = $first;
|
||||
$last{ id $obj} = $last;
|
||||
$obj;
|
||||
}
|
||||
|
||||
sub first { $first{ id shift()} }
|
||||
sub last { $last{ id shift()} }
|
||||
|
||||
sub name {
|
||||
my $n = shift;
|
||||
join ' ' => $n->first, $n->last;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
package Name_idhash_reg;
|
||||
use Hash::Util::FieldHash qw(register);
|
||||
|
||||
Hash::Util::FieldHash::idhashes \ my (%first, %last);
|
||||
|
||||
sub init {
|
||||
my $obj = shift;
|
||||
my ($first, $last) = @_;
|
||||
# create an object if called as class method
|
||||
$obj = bless \ my $o, $obj unless ref $obj;
|
||||
register( $obj, \ (%first, %last) );
|
||||
$first{ $obj} = $first;
|
||||
$last{ $obj} = $last;
|
||||
$obj;
|
||||
}
|
||||
|
||||
sub first { $first{ shift()} }
|
||||
sub last { $last{ shift()} }
|
||||
|
||||
sub name {
|
||||
my $n = shift;
|
||||
join ' ' => $n->first, $n->last;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
package Name_fieldhash;
|
||||
use Hash::Util::FieldHash;
|
||||
|
||||
Hash::Util::FieldHash::fieldhashes \ my (%first, %last);
|
||||
|
||||
sub init {
|
||||
my $obj = shift;
|
||||
my ($first, $last) = @_;
|
||||
# create an object if called as class method
|
||||
$obj = bless \ my $o, $obj unless ref $obj;
|
||||
$first{ $obj} = $first;
|
||||
$last{ $obj} = $last;
|
||||
$obj;
|
||||
}
|
||||
|
||||
sub first { $first{ shift()} }
|
||||
sub last { $last{ shift()} }
|
||||
|
||||
sub name {
|
||||
my $n = shift;
|
||||
join ' ' => $n->first, $n->last;
|
||||
}
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
To exercise the various implementations the script L<below|/"Example 2"> can
|
||||
be used.
|
||||
|
||||
It sets up a class C<Name> that is a mirror of one of the implementation
|
||||
classes C<Name_hash>, C<Name_id>, ..., C<Name_fieldhash>. That determines
|
||||
which implementation is run.
|
||||
|
||||
The script first verifies the function of the C<Name> class.
|
||||
|
||||
In the second step, the free inheritability of the implementation
|
||||
(or lack thereof) is demonstrated. For this purpose it constructs
|
||||
a class called C<NamedFile> which is a common subclass of C<Name> and
|
||||
the standard class C<IO::File>. This puts inheritability to the test
|
||||
because objects of C<IO::File> I<must> be globrefs. Objects of C<NamedFile>
|
||||
should behave like a file opened for reading and also support the C<name()>
|
||||
method. This class juncture works with exception of the C<Name_hash>
|
||||
implementation, where object initialization fails because of the
|
||||
incompatibility of object bodies.
|
||||
|
||||
=head2 Example 2
|
||||
|
||||
use strict; use warnings; $| = 1;
|
||||
|
||||
use Example;
|
||||
|
||||
{
|
||||
package Name;
|
||||
use parent 'Name_id'; # define here which implementation to run
|
||||
}
|
||||
|
||||
|
||||
# Verify that the base package works
|
||||
my $n = Name->init(qw(Albert Einstein));
|
||||
print $n->name, "\n";
|
||||
print "\n";
|
||||
|
||||
# Create a named file handle (See definition below)
|
||||
my $nf = NamedFile->init(qw(/tmp/x Filomena File));
|
||||
# use as a file handle...
|
||||
for ( 1 .. 3 ) {
|
||||
my $l = <$nf>;
|
||||
print "line $_: $l";
|
||||
}
|
||||
# ...and as a Name object
|
||||
print "...brought to you by ", $nf->name, "\n";
|
||||
exit;
|
||||
|
||||
|
||||
# Definition of NamedFile
|
||||
package NamedFile;
|
||||
use parent 'Name';
|
||||
use parent 'IO::File';
|
||||
|
||||
sub init {
|
||||
my $obj = shift;
|
||||
my ($file, $first, $last) = @_;
|
||||
$obj = $obj->IO::File::new() unless ref $obj;
|
||||
$obj->open($file) or die "Can't read '$file': $!";
|
||||
$obj->Name::init($first, $last);
|
||||
}
|
||||
__END__
|
||||
|
||||
|
||||
=head1 GUTS
|
||||
|
||||
To make C<Hash::Util::FieldHash> work, there were two changes to
|
||||
F<perl> itself. C<PERL_MAGIC_uvar> was made available for hashes,
|
||||
and weak references now call uvar C<get> magic after a weakref has been
|
||||
cleared. The first feature is used to make field hashes intercept
|
||||
their keys upon access. The second one triggers garbage collection.
|
||||
|
||||
=head2 The C<PERL_MAGIC_uvar> interface for hashes
|
||||
|
||||
C<PERL_MAGIC_uvar> I<get> magic is called from C<hv_fetch_common> and
|
||||
C<hv_delete_common> through the function C<hv_magic_uvar_xkey>, which
|
||||
defines the interface. The call happens for hashes with "uvar" magic
|
||||
if the C<ufuncs> structure has equal values in the C<uf_val> and C<uf_set>
|
||||
fields. Hashes are unaffected if (and as long as) these fields
|
||||
hold different values.
|
||||
|
||||
Upon the call, the C<mg_obj> field will hold the hash key to be accessed.
|
||||
Upon return, the C<SV*> value in C<mg_obj> will be used in place of the
|
||||
original key in the hash access. The integer index value in the first
|
||||
parameter will be the C<action> value from C<hv_fetch_common>, or -1
|
||||
if the call is from C<hv_delete_common>.
|
||||
|
||||
This is a template for a function suitable for the C<uf_val> field in
|
||||
a C<ufuncs> structure for this call. The C<uf_set> and C<uf_index>
|
||||
fields are irrelevant.
|
||||
|
||||
IV watch_key(pTHX_ IV action, SV* field) {
|
||||
MAGIC* mg = mg_find(field, PERL_MAGIC_uvar);
|
||||
SV* keysv = mg->mg_obj;
|
||||
/* Do whatever you need to. If you decide to
|
||||
supply a different key newkey, return it like this
|
||||
*/
|
||||
sv_2mortal(newkey);
|
||||
mg->mg_obj = newkey;
|
||||
return 0;
|
||||
}
|
||||
|
||||
=head2 Weakrefs call uvar magic
|
||||
|
||||
When a weak reference is stored in an C<SV> that has "uvar" magic, C<set>
|
||||
magic is called after the reference has gone stale. This hook can be
|
||||
used to trigger further garbage-collection activities associated with
|
||||
the referenced object.
|
||||
|
||||
=head2 How field hashes work
|
||||
|
||||
The three features of key hashes, I<key replacement>, I<thread support>,
|
||||
and I<garbage collection> are supported by a data structure called
|
||||
the I<object registry>. This is a private hash where every object
|
||||
is stored. An "object" in this sense is any reference (blessed or
|
||||
unblessed) that has been used as a field hash key.
|
||||
|
||||
The object registry keeps track of references that have been used as
|
||||
field hash keys. The keys are generated from the reference address
|
||||
like in a field hash (though the registry isn't a field hash). Each
|
||||
value is a weak copy of the original reference, stored in an C<SV> that
|
||||
is itself magical (C<PERL_MAGIC_uvar> again). The magical structure
|
||||
holds a list (another hash, really) of field hashes that the reference
|
||||
has been used with. When the weakref becomes stale, the magic is
|
||||
activated and uses the list to delete the reference from all field
|
||||
hashes it has been used with. After that, the entry is removed from
|
||||
the object registry itself. Implicitly, that frees the magic structure
|
||||
and the storage it has been using.
|
||||
|
||||
Whenever a reference is used as a field hash key, the object registry
|
||||
is checked and a new entry is made if necessary. The field hash is
|
||||
then added to the list of fields this reference has used.
|
||||
|
||||
The object registry is also used to repair a field hash after thread
|
||||
cloning. Here, the entire object registry is processed. For every
|
||||
reference found there, the field hashes it has used are visited and
|
||||
the entry is updated.
|
||||
|
||||
=head2 Internal function Hash::Util::FieldHash::_fieldhash
|
||||
|
||||
# test if %hash is a field hash
|
||||
my $result = _fieldhash \ %hash, 0;
|
||||
|
||||
# make %hash a field hash
|
||||
my $result = _fieldhash \ %hash, 1;
|
||||
|
||||
C<_fieldhash> is the internal function used to create field hashes.
|
||||
It takes two arguments, a hashref and a mode. If the mode is boolean
|
||||
false, the hash is not changed but tested if it is a field hash. If
|
||||
the hash isn't a field hash the return value is boolean false. If it
|
||||
is, the return value indicates the mode of field hash. When called with
|
||||
a boolean true mode, it turns the given hash into a field hash of this
|
||||
mode, returning the mode of the created field hash. C<_fieldhash>
|
||||
does not erase the given hash.
|
||||
|
||||
Currently there is only one type of field hash, and only the boolean
|
||||
value of the mode makes a difference, but that may change.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Anno Siegel (ANNO) wrote the xs code and the changes in perl proper
|
||||
Jerry Hedden (JDHEDDEN) made it faster
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (C) 2006-2007 by (Anno Siegel)
|
||||
|
||||
This library is free software; you can redistribute it and/or modify
|
||||
it under the same terms as Perl itself, either Perl version 5.8.7 or,
|
||||
at your option, any later version of Perl 5 you may have available.
|
||||
|
||||
=cut
|
||||
Reference in New Issue
Block a user