Initial Commit
This commit is contained in:
844
database/perl/lib/Hash/Util.pm
Normal file
844
database/perl/lib/Hash/Util.pm
Normal file
@@ -0,0 +1,844 @@
|
||||
package Hash::Util;
|
||||
|
||||
require 5.007003;
|
||||
use strict;
|
||||
use Carp;
|
||||
use warnings;
|
||||
no warnings 'uninitialized';
|
||||
use warnings::register;
|
||||
use Scalar::Util qw(reftype);
|
||||
|
||||
require Exporter;
|
||||
our @ISA = qw(Exporter);
|
||||
our @EXPORT_OK = qw(
|
||||
fieldhash fieldhashes
|
||||
|
||||
all_keys
|
||||
lock_keys unlock_keys
|
||||
lock_value unlock_value
|
||||
lock_hash unlock_hash
|
||||
lock_keys_plus
|
||||
hash_locked hash_unlocked
|
||||
hashref_locked hashref_unlocked
|
||||
hidden_keys legal_keys
|
||||
|
||||
lock_ref_keys unlock_ref_keys
|
||||
lock_ref_value unlock_ref_value
|
||||
lock_hashref unlock_hashref
|
||||
lock_ref_keys_plus
|
||||
hidden_ref_keys legal_ref_keys
|
||||
|
||||
hash_seed hash_value hv_store
|
||||
bucket_stats bucket_stats_formatted bucket_info bucket_array
|
||||
lock_hash_recurse unlock_hash_recurse
|
||||
lock_hashref_recurse unlock_hashref_recurse
|
||||
|
||||
hash_traversal_mask
|
||||
|
||||
bucket_ratio
|
||||
used_buckets
|
||||
num_buckets
|
||||
);
|
||||
BEGIN {
|
||||
# make sure all our XS routines are available early so their prototypes
|
||||
# are correctly applied in the following code.
|
||||
our $VERSION = '0.23';
|
||||
require XSLoader;
|
||||
XSLoader::load();
|
||||
}
|
||||
|
||||
sub import {
|
||||
my $class = shift;
|
||||
if ( grep /fieldhash/, @_ ) {
|
||||
require Hash::Util::FieldHash;
|
||||
Hash::Util::FieldHash->import(':all'); # for re-export
|
||||
}
|
||||
unshift @_, $class;
|
||||
goto &Exporter::import;
|
||||
}
|
||||
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Hash::Util - A selection of general-utility hash subroutines
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
# Restricted hashes
|
||||
|
||||
use Hash::Util qw(
|
||||
fieldhash fieldhashes
|
||||
|
||||
all_keys
|
||||
lock_keys unlock_keys
|
||||
lock_value unlock_value
|
||||
lock_hash unlock_hash
|
||||
lock_keys_plus
|
||||
hash_locked hash_unlocked
|
||||
hashref_locked hashref_unlocked
|
||||
hidden_keys legal_keys
|
||||
|
||||
lock_ref_keys unlock_ref_keys
|
||||
lock_ref_value unlock_ref_value
|
||||
lock_hashref unlock_hashref
|
||||
lock_ref_keys_plus
|
||||
hidden_ref_keys legal_ref_keys
|
||||
|
||||
hash_seed hash_value hv_store
|
||||
bucket_stats bucket_info bucket_array
|
||||
lock_hash_recurse unlock_hash_recurse
|
||||
lock_hashref_recurse unlock_hashref_recurse
|
||||
|
||||
hash_traversal_mask
|
||||
);
|
||||
|
||||
%hash = (foo => 42, bar => 23);
|
||||
# Ways to restrict a hash
|
||||
lock_keys(%hash);
|
||||
lock_keys(%hash, @keyset);
|
||||
lock_keys_plus(%hash, @additional_keys);
|
||||
|
||||
# Ways to inspect the properties of a restricted hash
|
||||
my @legal = legal_keys(%hash);
|
||||
my @hidden = hidden_keys(%hash);
|
||||
my $ref = all_keys(%hash,@keys,@hidden);
|
||||
my $is_locked = hash_locked(%hash);
|
||||
|
||||
# Remove restrictions on the hash
|
||||
unlock_keys(%hash);
|
||||
|
||||
# Lock individual values in a hash
|
||||
lock_value (%hash, 'foo');
|
||||
unlock_value(%hash, 'foo');
|
||||
|
||||
# Ways to change the restrictions on both keys and values
|
||||
lock_hash (%hash);
|
||||
unlock_hash(%hash);
|
||||
|
||||
my $hashes_are_randomised = hash_seed() !~ /^\0+$/;
|
||||
|
||||
my $int_hash_value = hash_value( 'string' );
|
||||
|
||||
my $mask= hash_traversal_mask(%hash);
|
||||
|
||||
hash_traversal_mask(%hash,1234);
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
C<Hash::Util> and C<Hash::Util::FieldHash> contain special functions
|
||||
for manipulating hashes that don't really warrant a keyword.
|
||||
|
||||
C<Hash::Util> contains a set of functions that support
|
||||
L<restricted hashes|/"Restricted hashes">. These are described in
|
||||
this document. C<Hash::Util::FieldHash> contains an (unrelated)
|
||||
set of functions that support the use of hashes in
|
||||
I<inside-out classes>, described in L<Hash::Util::FieldHash>.
|
||||
|
||||
By default C<Hash::Util> does not export anything.
|
||||
|
||||
=head2 Restricted hashes
|
||||
|
||||
5.8.0 introduces the ability to restrict a hash to a certain set of
|
||||
keys. No keys outside of this set can be added. It also introduces
|
||||
the ability to lock an individual key so it cannot be deleted and the
|
||||
ability to ensure that an individual value cannot be changed.
|
||||
|
||||
This is intended to largely replace the deprecated pseudo-hashes.
|
||||
|
||||
=over 4
|
||||
|
||||
=item B<lock_keys>
|
||||
|
||||
=item B<unlock_keys>
|
||||
|
||||
lock_keys(%hash);
|
||||
lock_keys(%hash, @keys);
|
||||
|
||||
Restricts the given %hash's set of keys to @keys. If @keys is not
|
||||
given it restricts it to its current keyset. No more keys can be
|
||||
added. delete() and exists() will still work, but will not alter
|
||||
the set of allowed keys. B<Note>: the current implementation prevents
|
||||
the hash from being bless()ed while it is in a locked state. Any attempt
|
||||
to do so will raise an exception. Of course you can still bless()
|
||||
the hash before you call lock_keys() so this shouldn't be a problem.
|
||||
|
||||
unlock_keys(%hash);
|
||||
|
||||
Removes the restriction on the %hash's keyset.
|
||||
|
||||
B<Note> that if any of the values of the hash have been locked they will not
|
||||
be unlocked after this sub executes.
|
||||
|
||||
Both routines return a reference to the hash operated on.
|
||||
|
||||
=cut
|
||||
|
||||
sub lock_ref_keys {
|
||||
my($hash, @keys) = @_;
|
||||
|
||||
_clear_placeholders(%$hash);
|
||||
if( @keys ) {
|
||||
my %keys = map { ($_ => 1) } @keys;
|
||||
my %original_keys = map { ($_ => 1) } keys %$hash;
|
||||
foreach my $k (keys %original_keys) {
|
||||
croak "Hash has key '$k' which is not in the new key set"
|
||||
unless $keys{$k};
|
||||
}
|
||||
|
||||
foreach my $k (@keys) {
|
||||
$hash->{$k} = undef unless exists $hash->{$k};
|
||||
}
|
||||
Internals::SvREADONLY %$hash, 1;
|
||||
|
||||
foreach my $k (@keys) {
|
||||
delete $hash->{$k} unless $original_keys{$k};
|
||||
}
|
||||
}
|
||||
else {
|
||||
Internals::SvREADONLY %$hash, 1;
|
||||
}
|
||||
|
||||
return $hash;
|
||||
}
|
||||
|
||||
sub unlock_ref_keys {
|
||||
my $hash = shift;
|
||||
|
||||
Internals::SvREADONLY %$hash, 0;
|
||||
return $hash;
|
||||
}
|
||||
|
||||
sub lock_keys (\%;@) { lock_ref_keys(@_) }
|
||||
sub unlock_keys (\%) { unlock_ref_keys(@_) }
|
||||
|
||||
#=item B<_clear_placeholders>
|
||||
#
|
||||
# This function removes any placeholder keys from a hash. See Perl_hv_clear_placeholders()
|
||||
# in hv.c for what it does exactly. It is currently exposed as XS by universal.c and
|
||||
# injected into the Hash::Util namespace.
|
||||
#
|
||||
# It is not intended for use outside of this module, and may be changed
|
||||
# or removed without notice or deprecation cycle.
|
||||
#
|
||||
#=cut
|
||||
#
|
||||
# sub _clear_placeholders {} # just in case someone searches...
|
||||
|
||||
=item B<lock_keys_plus>
|
||||
|
||||
lock_keys_plus(%hash,@additional_keys)
|
||||
|
||||
Similar to C<lock_keys()>, with the difference being that the optional key list
|
||||
specifies keys that may or may not be already in the hash. Essentially this is
|
||||
an easier way to say
|
||||
|
||||
lock_keys(%hash,@additional_keys,keys %hash);
|
||||
|
||||
Returns a reference to %hash
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
sub lock_ref_keys_plus {
|
||||
my ($hash,@keys) = @_;
|
||||
my @delete;
|
||||
_clear_placeholders(%$hash);
|
||||
foreach my $key (@keys) {
|
||||
unless (exists($hash->{$key})) {
|
||||
$hash->{$key}=undef;
|
||||
push @delete,$key;
|
||||
}
|
||||
}
|
||||
Internals::SvREADONLY(%$hash,1);
|
||||
delete @{$hash}{@delete};
|
||||
return $hash
|
||||
}
|
||||
|
||||
sub lock_keys_plus(\%;@) { lock_ref_keys_plus(@_) }
|
||||
|
||||
|
||||
=item B<lock_value>
|
||||
|
||||
=item B<unlock_value>
|
||||
|
||||
lock_value (%hash, $key);
|
||||
unlock_value(%hash, $key);
|
||||
|
||||
Locks and unlocks the value for an individual key of a hash. The value of a
|
||||
locked key cannot be changed.
|
||||
|
||||
Unless %hash has already been locked the key/value could be deleted
|
||||
regardless of this setting.
|
||||
|
||||
Returns a reference to the %hash.
|
||||
|
||||
=cut
|
||||
|
||||
sub lock_ref_value {
|
||||
my($hash, $key) = @_;
|
||||
# I'm doubtful about this warning, as it seems not to be true.
|
||||
# Marking a value in the hash as RO is useful, regardless
|
||||
# of the status of the hash itself.
|
||||
carp "Cannot usefully lock values in an unlocked hash"
|
||||
if !Internals::SvREADONLY(%$hash) && warnings::enabled;
|
||||
Internals::SvREADONLY $hash->{$key}, 1;
|
||||
return $hash
|
||||
}
|
||||
|
||||
sub unlock_ref_value {
|
||||
my($hash, $key) = @_;
|
||||
Internals::SvREADONLY $hash->{$key}, 0;
|
||||
return $hash
|
||||
}
|
||||
|
||||
sub lock_value (\%$) { lock_ref_value(@_) }
|
||||
sub unlock_value (\%$) { unlock_ref_value(@_) }
|
||||
|
||||
|
||||
=item B<lock_hash>
|
||||
|
||||
=item B<unlock_hash>
|
||||
|
||||
lock_hash(%hash);
|
||||
|
||||
lock_hash() locks an entire hash, making all keys and values read-only.
|
||||
No value can be changed, no keys can be added or deleted.
|
||||
|
||||
unlock_hash(%hash);
|
||||
|
||||
unlock_hash() does the opposite of lock_hash(). All keys and values
|
||||
are made writable. All values can be changed and keys can be added
|
||||
and deleted.
|
||||
|
||||
Returns a reference to the %hash.
|
||||
|
||||
=cut
|
||||
|
||||
sub lock_hashref {
|
||||
my $hash = shift;
|
||||
|
||||
lock_ref_keys($hash);
|
||||
|
||||
foreach my $value (values %$hash) {
|
||||
Internals::SvREADONLY($value,1);
|
||||
}
|
||||
|
||||
return $hash;
|
||||
}
|
||||
|
||||
sub unlock_hashref {
|
||||
my $hash = shift;
|
||||
|
||||
foreach my $value (values %$hash) {
|
||||
Internals::SvREADONLY($value, 0);
|
||||
}
|
||||
|
||||
unlock_ref_keys($hash);
|
||||
|
||||
return $hash;
|
||||
}
|
||||
|
||||
sub lock_hash (\%) { lock_hashref(@_) }
|
||||
sub unlock_hash (\%) { unlock_hashref(@_) }
|
||||
|
||||
=item B<lock_hash_recurse>
|
||||
|
||||
=item B<unlock_hash_recurse>
|
||||
|
||||
lock_hash_recurse(%hash);
|
||||
|
||||
lock_hash() locks an entire hash and any hashes it references recursively,
|
||||
making all keys and values read-only. No value can be changed, no keys can
|
||||
be added or deleted.
|
||||
|
||||
This method B<only> recurses into hashes that are referenced by another hash.
|
||||
Thus a Hash of Hashes (HoH) will all be restricted, but a Hash of Arrays of
|
||||
Hashes (HoAoH) will only have the top hash restricted.
|
||||
|
||||
unlock_hash_recurse(%hash);
|
||||
|
||||
unlock_hash_recurse() does the opposite of lock_hash_recurse(). All keys and
|
||||
values are made writable. All values can be changed and keys can be added
|
||||
and deleted. Identical recursion restrictions apply as to lock_hash_recurse().
|
||||
|
||||
Returns a reference to the %hash.
|
||||
|
||||
=cut
|
||||
|
||||
sub lock_hashref_recurse {
|
||||
my $hash = shift;
|
||||
|
||||
lock_ref_keys($hash);
|
||||
foreach my $value (values %$hash) {
|
||||
my $type = reftype($value);
|
||||
if (defined($type) and $type eq 'HASH') {
|
||||
lock_hashref_recurse($value);
|
||||
}
|
||||
Internals::SvREADONLY($value,1);
|
||||
}
|
||||
return $hash
|
||||
}
|
||||
|
||||
sub unlock_hashref_recurse {
|
||||
my $hash = shift;
|
||||
|
||||
foreach my $value (values %$hash) {
|
||||
my $type = reftype($value);
|
||||
if (defined($type) and $type eq 'HASH') {
|
||||
unlock_hashref_recurse($value);
|
||||
}
|
||||
Internals::SvREADONLY($value,0);
|
||||
}
|
||||
unlock_ref_keys($hash);
|
||||
return $hash;
|
||||
}
|
||||
|
||||
sub lock_hash_recurse (\%) { lock_hashref_recurse(@_) }
|
||||
sub unlock_hash_recurse (\%) { unlock_hashref_recurse(@_) }
|
||||
|
||||
=item B<hashref_locked>
|
||||
|
||||
=item B<hash_locked>
|
||||
|
||||
hashref_locked(\%hash) and print "Hash is locked!\n";
|
||||
hash_locked(%hash) and print "Hash is locked!\n";
|
||||
|
||||
Returns true if the hash and its keys are locked.
|
||||
|
||||
=cut
|
||||
|
||||
sub hashref_locked {
|
||||
my $hash=shift;
|
||||
Internals::SvREADONLY(%$hash);
|
||||
}
|
||||
|
||||
sub hash_locked(\%) { hashref_locked(@_) }
|
||||
|
||||
=item B<hashref_unlocked>
|
||||
|
||||
=item B<hash_unlocked>
|
||||
|
||||
hashref_unlocked(\%hash) and print "Hash is unlocked!\n";
|
||||
hash_unlocked(%hash) and print "Hash is unlocked!\n";
|
||||
|
||||
Returns true if the hash and its keys are unlocked.
|
||||
|
||||
=cut
|
||||
|
||||
sub hashref_unlocked {
|
||||
my $hash=shift;
|
||||
!Internals::SvREADONLY(%$hash);
|
||||
}
|
||||
|
||||
sub hash_unlocked(\%) { hashref_unlocked(@_) }
|
||||
|
||||
=for demerphqs_editor
|
||||
sub legal_ref_keys{}
|
||||
sub hidden_ref_keys{}
|
||||
sub all_keys{}
|
||||
|
||||
=cut
|
||||
|
||||
sub legal_keys(\%) { legal_ref_keys(@_) }
|
||||
sub hidden_keys(\%){ hidden_ref_keys(@_) }
|
||||
|
||||
=item B<legal_keys>
|
||||
|
||||
my @keys = legal_keys(%hash);
|
||||
|
||||
Returns the list of the keys that are legal in a restricted hash.
|
||||
In the case of an unrestricted hash this is identical to calling
|
||||
keys(%hash).
|
||||
|
||||
=item B<hidden_keys>
|
||||
|
||||
my @keys = hidden_keys(%hash);
|
||||
|
||||
Returns the list of the keys that are legal in a restricted hash but
|
||||
do not have a value associated to them. Thus if 'foo' is a
|
||||
"hidden" key of the %hash it will return false for both C<defined>
|
||||
and C<exists> tests.
|
||||
|
||||
In the case of an unrestricted hash this will return an empty list.
|
||||
|
||||
B<NOTE> this is an experimental feature that is heavily dependent
|
||||
on the current implementation of restricted hashes. Should the
|
||||
implementation change, this routine may become meaningless, in which
|
||||
case it will return an empty list.
|
||||
|
||||
=item B<all_keys>
|
||||
|
||||
all_keys(%hash,@keys,@hidden);
|
||||
|
||||
Populates the arrays @keys with the all the keys that would pass
|
||||
an C<exists> tests, and populates @hidden with the remaining legal
|
||||
keys that have not been utilized.
|
||||
|
||||
Returns a reference to the hash.
|
||||
|
||||
In the case of an unrestricted hash this will be equivalent to
|
||||
|
||||
$ref = do {
|
||||
@keys = keys %hash;
|
||||
@hidden = ();
|
||||
\%hash
|
||||
};
|
||||
|
||||
B<NOTE> this is an experimental feature that is heavily dependent
|
||||
on the current implementation of restricted hashes. Should the
|
||||
implementation change this routine may become meaningless in which
|
||||
case it will behave identically to how it would behave on an
|
||||
unrestricted hash.
|
||||
|
||||
=item B<hash_seed>
|
||||
|
||||
my $hash_seed = hash_seed();
|
||||
|
||||
hash_seed() returns the seed bytes used to randomise hash ordering.
|
||||
|
||||
B<Note that the hash seed is sensitive information>: by knowing it one
|
||||
can craft a denial-of-service attack against Perl code, even remotely,
|
||||
see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
|
||||
B<Do not disclose the hash seed> to people who don't need to know it.
|
||||
See also L<perlrun/PERL_HASH_SEED_DEBUG>.
|
||||
|
||||
Prior to Perl 5.17.6 this function returned a UV, it now returns a string,
|
||||
which may be of nearly any size as determined by the hash function your
|
||||
Perl has been built with. Possible sizes may be but are not limited to
|
||||
4 bytes (for most hash algorithms) and 16 bytes (for siphash).
|
||||
|
||||
=item B<hash_value>
|
||||
|
||||
my $hash_value = hash_value($string);
|
||||
|
||||
hash_value() returns the current perl's internal hash value for a given
|
||||
string.
|
||||
|
||||
Returns a 32 bit integer representing the hash value of the string passed
|
||||
in. This value is only reliable for the lifetime of the process. It may
|
||||
be different depending on invocation, environment variables, perl version,
|
||||
architectures, and build options.
|
||||
|
||||
B<Note that the hash value of a given string is sensitive information>:
|
||||
by knowing it one can deduce the hash seed which in turn can allow one to
|
||||
craft a denial-of-service attack against Perl code, even remotely,
|
||||
see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
|
||||
B<Do not disclose the hash value of a string> to people who don't need to
|
||||
know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>.
|
||||
|
||||
=item B<bucket_info>
|
||||
|
||||
Return a set of basic information about a hash.
|
||||
|
||||
my ($keys, $buckets, $used, @length_counts)= bucket_info($hash);
|
||||
|
||||
Fields are as follows:
|
||||
|
||||
0: Number of keys in the hash
|
||||
1: Number of buckets in the hash
|
||||
2: Number of used buckets in the hash
|
||||
rest : list of counts, Kth element is the number of buckets
|
||||
with K keys in it.
|
||||
|
||||
See also bucket_stats() and bucket_array().
|
||||
|
||||
=item B<bucket_stats>
|
||||
|
||||
Returns a list of statistics about a hash.
|
||||
|
||||
my ($keys, $buckets, $used, $quality, $utilization_ratio,
|
||||
$collision_pct, $mean, $stddev, @length_counts)
|
||||
= bucket_stats($hashref);
|
||||
|
||||
Fields are as follows:
|
||||
|
||||
0: Number of keys in the hash
|
||||
1: Number of buckets in the hash
|
||||
2: Number of used buckets in the hash
|
||||
3: Hash Quality Score
|
||||
4: Percent of buckets used
|
||||
5: Percent of keys which are in collision
|
||||
6: Mean bucket length of occupied buckets
|
||||
7: Standard Deviation of bucket lengths of occupied buckets
|
||||
rest : list of counts, Kth element is the number of buckets
|
||||
with K keys in it.
|
||||
|
||||
See also bucket_info() and bucket_array().
|
||||
|
||||
Note that Hash Quality Score would be 1 for an ideal hash, numbers
|
||||
close to and below 1 indicate good hashing, and number significantly
|
||||
above indicate a poor score. In practice it should be around 0.95 to 1.05.
|
||||
It is defined as:
|
||||
|
||||
$score= sum( $count[$length] * ($length * ($length + 1) / 2) )
|
||||
/
|
||||
( ( $keys / 2 * $buckets ) *
|
||||
( $keys + ( 2 * $buckets ) - 1 ) )
|
||||
|
||||
The formula is from the Red Dragon book (reformulated to use the data available)
|
||||
and is documented at L<http://www.strchr.com/hash_functions>
|
||||
|
||||
=item B<bucket_array>
|
||||
|
||||
my $array= bucket_array(\%hash);
|
||||
|
||||
Returns a packed representation of the bucket array associated with a hash. Each element
|
||||
of the array is either an integer K, in which case it represents K empty buckets, or
|
||||
a reference to another array which contains the keys that are in that bucket.
|
||||
|
||||
B<Note that the information returned by bucket_array is sensitive information>:
|
||||
by knowing it one can directly attack perl's hash function which in turn may allow
|
||||
one to craft a denial-of-service attack against Perl code, even remotely,
|
||||
see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
|
||||
B<Do not disclose the output of this function> to people who don't need to
|
||||
know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>. This function is provided strictly
|
||||
for debugging and diagnostics purposes only, it is hard to imagine a reason why it
|
||||
would be used in production code.
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
sub bucket_stats {
|
||||
my ($hash) = @_;
|
||||
my ($keys, $buckets, $used, @length_counts) = bucket_info($hash);
|
||||
my $sum;
|
||||
my $score;
|
||||
for (1 .. $#length_counts) {
|
||||
$sum += ($length_counts[$_] * $_);
|
||||
$score += $length_counts[$_] * ( $_ * ($_ + 1 ) / 2 );
|
||||
}
|
||||
$score = $score /
|
||||
(( $keys / (2 * $buckets )) * ( $keys + ( 2 * $buckets ) - 1 ))
|
||||
if $keys;
|
||||
my ($mean, $stddev)= (0, 0);
|
||||
if ($used) {
|
||||
$mean= $sum / $used;
|
||||
$sum= 0;
|
||||
$sum += ($length_counts[$_] * (($_-$mean)**2)) for 1 .. $#length_counts;
|
||||
|
||||
$stddev= sqrt($sum/$used);
|
||||
}
|
||||
return $keys, $buckets, $used, $keys ? ($score, $used/$buckets, ($keys-$used)/$keys, $mean, $stddev, @length_counts) : ();
|
||||
}
|
||||
|
||||
=item B<bucket_stats_formatted>
|
||||
|
||||
print bucket_stats_formatted($hashref);
|
||||
|
||||
Return a formatted report of the information returned by bucket_stats().
|
||||
An example report looks like this:
|
||||
|
||||
Keys: 50 Buckets: 33/64 Quality-Score: 1.01 (Good)
|
||||
Utilized Buckets: 51.56% Optimal: 78.12% Keys In Collision: 34.00%
|
||||
Chain Length - mean: 1.52 stddev: 0.66
|
||||
Buckets 64 [0000000000000000000000000000000111111111111111111122222222222333]
|
||||
Len 0 Pct: 48.44 [###############################]
|
||||
Len 1 Pct: 29.69 [###################]
|
||||
Len 2 Pct: 17.19 [###########]
|
||||
Len 3 Pct: 4.69 [###]
|
||||
Keys 50 [11111111111111111111111111111111122222222222222333]
|
||||
Pos 1 Pct: 66.00 [#################################]
|
||||
Pos 2 Pct: 28.00 [##############]
|
||||
Pos 3 Pct: 6.00 [###]
|
||||
|
||||
The first set of stats gives some summary statistical information,
|
||||
including the quality score translated into "Good", "Poor" and "Bad",
|
||||
(score<=1.05, score<=1.2, score>1.2). See the documentation in
|
||||
bucket_stats() for more details.
|
||||
|
||||
The two sets of barcharts give stats and a visual indication of performance
|
||||
of the hash.
|
||||
|
||||
The first gives data on bucket chain lengths and provides insight on how
|
||||
much work a fetch *miss* will take. In this case we have to inspect every item
|
||||
in a bucket before we can be sure the item is not in the list. The performance
|
||||
for an insert is equivalent to this case, as is a delete where the item
|
||||
is not in the hash.
|
||||
|
||||
The second gives data on how many keys are at each depth in the chain, and
|
||||
gives an idea of how much work a fetch *hit* will take. The performance for
|
||||
an update or delete of an item in the hash is equivalent to this case.
|
||||
|
||||
Note that these statistics are summary only. Actual performance will depend
|
||||
on real hit/miss ratios accessing the hash. If you are concerned by hit ratios
|
||||
you are recommended to "oversize" your hash by using something like:
|
||||
|
||||
keys(%hash)= keys(%hash) << $k;
|
||||
|
||||
With $k chosen carefully, and likely to be a small number like 1 or 2. In
|
||||
theory the larger the bucket array the less chance of collision.
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
sub _bucket_stats_formatted_bars {
|
||||
my ($total, $ary, $start_idx, $title, $row_title)= @_;
|
||||
|
||||
my $return = "";
|
||||
my $max_width= $total > 64 ? 64 : $total;
|
||||
my $bar_width= $max_width / $total;
|
||||
|
||||
my $str= "";
|
||||
if ( @$ary < 10) {
|
||||
for my $idx ($start_idx .. $#$ary) {
|
||||
$str .= $idx x sprintf("%.0f", ($ary->[$idx] * $bar_width));
|
||||
}
|
||||
} else {
|
||||
$str= "-" x $max_width;
|
||||
}
|
||||
$return .= sprintf "%-7s %6d [%s]\n",$title, $total, $str;
|
||||
|
||||
foreach my $idx ($start_idx .. $#$ary) {
|
||||
$return .= sprintf "%-.3s %3d %6.2f%% %6d [%s]\n",
|
||||
$row_title,
|
||||
$idx,
|
||||
$ary->[$idx] / $total * 100,
|
||||
$ary->[$idx],
|
||||
"#" x sprintf("%.0f", ($ary->[$idx] * $bar_width)),
|
||||
;
|
||||
}
|
||||
return $return;
|
||||
}
|
||||
|
||||
sub bucket_stats_formatted {
|
||||
my ($hashref)= @_;
|
||||
my ($keys, $buckets, $used, $score, $utilization_ratio, $collision_pct,
|
||||
$mean, $stddev, @length_counts) = bucket_stats($hashref);
|
||||
|
||||
my $return= sprintf "Keys: %d Buckets: %d/%d Quality-Score: %.2f (%s)\n"
|
||||
. "Utilized Buckets: %.2f%% Optimal: %.2f%% Keys In Collision: %.2f%%\n"
|
||||
. "Chain Length - mean: %.2f stddev: %.2f\n",
|
||||
$keys, $used, $buckets, $score, $score <= 1.05 ? "Good" : $score < 1.2 ? "Poor" : "Bad",
|
||||
$utilization_ratio * 100,
|
||||
$keys/$buckets * 100,
|
||||
$collision_pct * 100,
|
||||
$mean, $stddev;
|
||||
|
||||
my @key_depth;
|
||||
$key_depth[$_]= $length_counts[$_] + ( $key_depth[$_+1] || 0 )
|
||||
for reverse 1 .. $#length_counts;
|
||||
|
||||
if ($keys) {
|
||||
$return .= _bucket_stats_formatted_bars($buckets, \@length_counts, 0, "Buckets", "Len");
|
||||
$return .= _bucket_stats_formatted_bars($keys, \@key_depth, 1, "Keys", "Pos");
|
||||
}
|
||||
return $return
|
||||
}
|
||||
|
||||
=item B<hv_store>
|
||||
|
||||
my $sv = 0;
|
||||
hv_store(%hash,$key,$sv) or die "Failed to alias!";
|
||||
$hash{$key} = 1;
|
||||
print $sv; # prints 1
|
||||
|
||||
Stores an alias to a variable in a hash instead of copying the value.
|
||||
|
||||
=item B<hash_traversal_mask>
|
||||
|
||||
As of Perl 5.18 every hash has its own hash traversal order, and this order
|
||||
changes every time a new element is inserted into the hash. This functionality
|
||||
is provided by maintaining an unsigned integer mask (U32) which is xor'ed
|
||||
with the actual bucket id during a traversal of the hash buckets using keys(),
|
||||
values() or each().
|
||||
|
||||
You can use this subroutine to get and set the traversal mask for a specific
|
||||
hash. Setting the mask ensures that a given hash will produce the same key
|
||||
order. B<Note> that this does B<not> guarantee that B<two> hashes will produce
|
||||
the same key order for the same hash seed and traversal mask, items that
|
||||
collide into one bucket may have different orders regardless of this setting.
|
||||
|
||||
=item B<bucket_ratio>
|
||||
|
||||
This function behaves the same way that scalar(%hash) behaved prior to
|
||||
Perl 5.25. Specifically if the hash is tied, then it calls the SCALAR tied
|
||||
hash method, if untied then if the hash is empty it return 0, otherwise it
|
||||
returns a string containing the number of used buckets in the hash,
|
||||
followed by a slash, followed by the total number of buckets in the hash.
|
||||
|
||||
my %hash=("foo"=>1);
|
||||
print Hash::Util::bucket_ratio(%hash); # prints "1/8"
|
||||
|
||||
=item B<used_buckets>
|
||||
|
||||
This function returns the count of used buckets in the hash. It is expensive
|
||||
to calculate and the value is NOT cached, so avoid use of this function
|
||||
in production code.
|
||||
|
||||
=item B<num_buckets>
|
||||
|
||||
This function returns the total number of buckets the hash holds, or would
|
||||
hold if the array were created. (When a hash is freshly created the array
|
||||
may not be allocated even though this value will be non-zero.)
|
||||
|
||||
=back
|
||||
|
||||
=head2 Operating on references to hashes.
|
||||
|
||||
Most subroutines documented in this module have equivalent versions
|
||||
that operate on references to hashes instead of native hashes.
|
||||
The following is a list of these subs. They are identical except
|
||||
in name and in that instead of taking a %hash they take a $hashref,
|
||||
and additionally are not prototyped.
|
||||
|
||||
=over 4
|
||||
|
||||
=item lock_ref_keys
|
||||
|
||||
=item unlock_ref_keys
|
||||
|
||||
=item lock_ref_keys_plus
|
||||
|
||||
=item lock_ref_value
|
||||
|
||||
=item unlock_ref_value
|
||||
|
||||
=item lock_hashref
|
||||
|
||||
=item unlock_hashref
|
||||
|
||||
=item lock_hashref_recurse
|
||||
|
||||
=item unlock_hashref_recurse
|
||||
|
||||
=item hash_ref_unlocked
|
||||
|
||||
=item legal_ref_keys
|
||||
|
||||
=item hidden_ref_keys
|
||||
|
||||
=back
|
||||
|
||||
=head1 CAVEATS
|
||||
|
||||
Note that the trapping of the restricted operations is not atomic:
|
||||
for example
|
||||
|
||||
eval { %hash = (illegal_key => 1) }
|
||||
|
||||
leaves the C<%hash> empty rather than with its original contents.
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
The interface exposed by this module is very close to the current
|
||||
implementation of restricted hashes. Over time it is expected that
|
||||
this behavior will be extended and the interface abstracted further.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Michael G Schwern <schwern@pobox.com> on top of code by Nick
|
||||
Ing-Simmons and Jeffrey Friedl.
|
||||
|
||||
hv_store() is from Array::RefElem, Copyright 2000 Gisle Aas.
|
||||
|
||||
Additional code by Yves Orton.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<Scalar::Util>, L<List::Util> and L<perlsec/"Algorithmic Complexity Attacks">.
|
||||
|
||||
L<Hash::Util::FieldHash>.
|
||||
|
||||
=cut
|
||||
|
||||
1;
|
||||
Reference in New Issue
Block a user