Initial Commit

2025-12-03 16:38:10 +01:00
parent c5e26bf594
commit b732d8d4b5
17680 changed files with 5977495 additions and 2 deletions
--- a/database/perl/lib/Locale/Maketext.pm
+++ b/database/perl/lib/Locale/Maketext.pm
@@ -0,0 +1,859 @@
+package Locale::Maketext;
+use strict;
+our $USE_LITERALS;
+use Carp ();
+use I18N::LangTags ();
+use I18N::LangTags::Detect ();
+
+#--------------------------------------------------------------------------
+
+BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } }
+# define the constant 'DEBUG' at compile-time
+
+# turn on utf8 if we have it (this is what GutsLoader.pm used to do essentially )
+#    use if (exists $INC{'utf8.pm'} || eval 'use utf8'), 'utf8';
+BEGIN {
+
+    # if we have it || we can load it
+    if ( exists $INC{'utf8.pm'} || eval { local $SIG{'__DIE__'};require utf8; } ) {
+        utf8->import();
+        DEBUG and warn " utf8 on for _compile()\n";
+    }
+    else {
+        DEBUG and warn " utf8 not available for _compile() ($INC{'utf8.pm'})\n$@\n";
+    }
+}
+
+
+our $VERSION = '1.29';
+our @ISA = ();
+
+our $MATCH_SUPERS = 1;
+our $MATCH_SUPERS_TIGHTLY = 1;
+our $USING_LANGUAGE_TAGS  = 1;
+# Turning this off is somewhat of a security risk in that little or no
+# checking will be done on the legality of tokens passed to the
+# eval("use $module_name") in _try_use.  If you turn this off, you have
+# to do your own taint checking.
+
+$USE_LITERALS = 1 unless defined $USE_LITERALS;
+# a hint for compiling bracket-notation things.
+
+my %isa_scan = ();
+
+###########################################################################
+
+sub quant {
+    my($handle, $num, @forms) = @_;
+
+    return $num if @forms == 0; # what should this mean?
+    return $forms[2] if @forms > 2 and $num == 0; # special zeroth case
+
+    # Normal case:
+    # Note that the formatting of $num is preserved.
+    return( $handle->numf($num) . ' ' . $handle->numerate($num, @forms) );
+    # Most human languages put the number phrase before the qualified phrase.
+}
+
+
+sub numerate {
+    # return this lexical item in a form appropriate to this number
+    my($handle, $num, @forms) = @_;
+    my $s = ($num == 1);
+
+    return '' unless @forms;
+    if(@forms == 1) { # only the headword form specified
+        return $s ? $forms[0] : ($forms[0] . 's'); # very cheap hack.
+    }
+    else { # sing and plural were specified
+        return $s ? $forms[0] : $forms[1];
+    }
+}
+
+#--------------------------------------------------------------------------
+
+sub numf {
+    my($handle, $num) = @_[0,1];
+    if($num < 10_000_000_000 and $num > -10_000_000_000 and $num == int($num)) {
+        $num += 0;  # Just use normal integer stringification.
+        # Specifically, don't let %G turn ten million into 1E+007
+    }
+    else {
+        $num = CORE::sprintf('%G', $num);
+        # "CORE::" is there to avoid confusion with the above sub sprintf.
+    }
+    while( $num =~ s/^([-+]?\d+)(\d{3})/$1,$2/s ) {1}  # right from perlfaq5
+    # The initial \d+ gobbles as many digits as it can, and then we
+    #  backtrack so it un-eats the rightmost three, and then we
+    #  insert the comma there.
+
+    $num =~ tr<.,><,.> if ref($handle) and $handle->{'numf_comma'};
+    # This is just a lame hack instead of using Number::Format
+    return $num;
+}
+
+sub sprintf {
+    no integer;
+    my($handle, $format, @params) = @_;
+    return CORE::sprintf($format, @params);
+    # "CORE::" is there to avoid confusion with myself!
+}
+
+#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#
+
+use integer; # vroom vroom... applies to the whole rest of the module
+
+sub language_tag {
+    my $it = ref($_[0]) || $_[0];
+    return undef unless $it =~ m/([^':]+)(?:::)?$/s;
+    $it = lc($1);
+    $it =~ tr<_><->;
+    return $it;
+}
+
+sub encoding {
+    my $it = $_[0];
+    return(
+        (ref($it) && $it->{'encoding'})
+        || 'iso-8859-1'   # Latin-1
+    );
+}
+
+#--------------------------------------------------------------------------
+
+sub fallback_languages { return('i-default', 'en', 'en-US') }
+
+sub fallback_language_classes { return () }
+
+#--------------------------------------------------------------------------
+
+sub fail_with { # an actual attribute method!
+    my($handle, @params) = @_;
+    return unless ref($handle);
+    $handle->{'fail'} = $params[0] if @params;
+    return $handle->{'fail'};
+}
+
+#--------------------------------------------------------------------------
+
+sub blacklist {
+    my ( $handle, @methods ) = @_;
+
+    unless ( defined $handle->{'blacklist'} ) {
+        no strict 'refs';
+
+        # Don't let people call methods they're not supposed to from maketext.
+        # Explicitly exclude all methods in this package that start with an
+        # underscore on principle.
+        $handle->{'blacklist'} = {
+            map { $_ => 1 } (
+                qw/
+                  blacklist
+                  encoding
+                  fail_with
+                  failure_handler_auto
+                  fallback_language_classes
+                  fallback_languages
+                  get_handle
+                  init
+                  language_tag
+                  maketext
+                  new
+                  whitelist
+                  /, grep { /^_/ } keys %{ __PACKAGE__ . "::" }
+            ),
+        };
+    }
+
+    if ( scalar @methods ) {
+        $handle->{'blacklist'} = { %{ $handle->{'blacklist'} }, map { $_ => 1 } @methods };
+    }
+
+    delete $handle->{'_external_lex_cache'};
+    return;
+}
+
+sub whitelist {
+    my ( $handle, @methods ) = @_;
+    if ( scalar @methods ) {
+        $handle->{'whitelist'} = {} unless defined $handle->{'whitelist'};
+        $handle->{'whitelist'} = { %{ $handle->{'whitelist'} }, map { $_ => 1 } @methods };
+    }
+
+    delete $handle->{'_external_lex_cache'};
+    return;
+}
+
+#--------------------------------------------------------------------------
+
+sub failure_handler_auto {
+    # Meant to be used like:
+    #  $handle->fail_with('failure_handler_auto')
+
+    my $handle = shift;
+    my $phrase = shift;
+
+    $handle->{'failure_lex'} ||= {};
+    my $lex = $handle->{'failure_lex'};
+
+    my $value ||= ($lex->{$phrase} ||= $handle->_compile($phrase));
+
+    # Dumbly copied from sub maketext:
+    return ${$value} if ref($value) eq 'SCALAR';
+    return $value    if ref($value) ne 'CODE';
+    {
+        local $SIG{'__DIE__'};
+        eval { $value = &$value($handle, @_) };
+    }
+    # If we make it here, there was an exception thrown in the
+    #  call to $value, and so scream:
+    if($@) {
+        # pretty up the error message
+        $@ =~ s{\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?}
+                 {\n in bracket code [compiled line $1],}s;
+        #$err =~ s/\n?$/\n/s;
+        Carp::croak "Error in maketexting \"$phrase\":\n$@ as used";
+        # Rather unexpected, but suppose that the sub tried calling
+        # a method that didn't exist.
+    }
+    else {
+        return $value;
+    }
+}
+
+#==========================================================================
+
+sub new {
+    # Nothing fancy!
+    my $class = ref($_[0]) || $_[0];
+    my $handle = bless {}, $class;
+    $handle->blacklist;
+    $handle->init;
+    return $handle;
+}
+
+sub init { return } # no-op
+
+###########################################################################
+
+sub maketext {
+    # Remember, this can fail.  Failure is controllable many ways.
+    Carp::croak 'maketext requires at least one parameter' unless @_ > 1;
+
+    my($handle, $phrase) = splice(@_,0,2);
+    Carp::confess('No handle/phrase') unless (defined($handle) && defined($phrase));
+
+    # backup $@ in case it's still being used in the calling code.
+    # If no failures, we'll re-set it back to what it was later.
+    my $at = $@;
+
+    # Copy @_ case one of its elements is $@.
+    @_ = @_;
+
+    # Look up the value:
+
+    my $value;
+    if (exists $handle->{'_external_lex_cache'}{$phrase}) {
+        DEBUG and warn "* Using external lex cache version of \"$phrase\"\n";
+        $value = $handle->{'_external_lex_cache'}{$phrase};
+    }
+    else {
+        foreach my $h_r (
+            @{  $isa_scan{ref($handle) || $handle} || $handle->_lex_refs  }
+        ) {
+            DEBUG and warn "* Looking up \"$phrase\" in $h_r\n";
+            if(exists $h_r->{$phrase}) {
+                DEBUG and warn "  Found \"$phrase\" in $h_r\n";
+                unless(ref($value = $h_r->{$phrase})) {
+                    # Nonref means it's not yet compiled.  Compile and replace.
+                    if ($handle->{'use_external_lex_cache'}) {
+                        $value = $handle->{'_external_lex_cache'}{$phrase} = $handle->_compile($value);
+                    }
+                    else {
+                        $value = $h_r->{$phrase} = $handle->_compile($value);
+                    }
+                }
+                last;
+            }
+            # extending packages need to be able to localize _AUTO and if readonly can't "local $h_r->{'_AUTO'} = 1;"
+            # but they can "local $handle->{'_external_lex_cache'}{'_AUTO'} = 1;"
+            elsif($phrase !~ m/^_/s and ($handle->{'use_external_lex_cache'} ? ( exists $handle->{'_external_lex_cache'}{'_AUTO'} ? $handle->{'_external_lex_cache'}{'_AUTO'} : $h_r->{'_AUTO'} ) : $h_r->{'_AUTO'})) {
+                # it's an auto lex, and this is an autoable key!
+                DEBUG and warn "  Automaking \"$phrase\" into $h_r\n";
+                if ($handle->{'use_external_lex_cache'}) {
+                    $value = $handle->{'_external_lex_cache'}{$phrase} = $handle->_compile($phrase);
+                }
+                else {
+                    $value = $h_r->{$phrase} = $handle->_compile($phrase);
+                }
+                last;
+            }
+            DEBUG>1 and print "  Not found in $h_r, nor automakable\n";
+            # else keep looking
+        }
+    }
+
+    unless(defined($value)) {
+        DEBUG and warn "! Lookup of \"$phrase\" in/under ", ref($handle) || $handle, " fails.\n";
+        if(ref($handle) and $handle->{'fail'}) {
+            DEBUG and warn "WARNING0: maketext fails looking for <$phrase>\n";
+            my $fail;
+            if(ref($fail = $handle->{'fail'}) eq 'CODE') { # it's a sub reference
+                $@ = $at; # Put $@ back in case we altered it along the way.
+                return &{$fail}($handle, $phrase, @_);
+                # If it ever returns, it should return a good value.
+            }
+            else { # It's a method name
+                $@ = $at; # Put $@ back in case we altered it along the way.
+                return $handle->$fail($phrase, @_);
+                # If it ever returns, it should return a good value.
+            }
+        }
+        else {
+            # All we know how to do is this;
+            Carp::croak("maketext doesn't know how to say:\n$phrase\nas needed");
+        }
+    }
+
+    if(ref($value) eq 'SCALAR'){
+        $@ = $at; # Put $@ back in case we altered it along the way.
+        return $$value ;
+    }
+    if(ref($value) ne 'CODE'){
+        $@ = $at; # Put $@ back in case we altered it along the way.
+        return $value ;
+    }
+
+    {
+        local $SIG{'__DIE__'};
+        eval { $value = &$value($handle, @_) };
+    }
+    # If we make it here, there was an exception thrown in the
+    #  call to $value, and so scream:
+    if ($@) {
+        # pretty up the error message
+        $@ =~ s{\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?}
+                 {\n in bracket code [compiled line $1],}s;
+        #$err =~ s/\n?$/\n/s;
+        Carp::croak "Error in maketexting \"$phrase\":\n$@ as used";
+        # Rather unexpected, but suppose that the sub tried calling
+        # a method that didn't exist.
+    }
+    else {
+        $@ = $at; # Put $@ back in case we altered it along the way.
+        return $value;
+    }
+    $@ = $at; # Put $@ back in case we altered it along the way.
+}
+
+###########################################################################
+
+sub get_handle {  # This is a constructor and, yes, it CAN FAIL.
+    # Its class argument has to be the base class for the current
+    # application's l10n files.
+
+    my($base_class, @languages) = @_;
+    $base_class = ref($base_class) || $base_class;
+    # Complain if they use __PACKAGE__ as a project base class?
+
+    if( @languages ) {
+        DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
+        if($USING_LANGUAGE_TAGS) {   # An explicit language-list was given!
+            @languages =
+            map {; $_, I18N::LangTags::alternate_language_tags($_) }
+            # Catch alternation
+            map I18N::LangTags::locale2language_tag($_),
+            # If it's a lg tag, fine, pass thru (untainted)
+            # If it's a locale ID, try converting to a lg tag (untainted),
+            # otherwise nix it.
+            @languages;
+            DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
+        }
+    }
+    else {
+        @languages = $base_class->_ambient_langprefs;
+    }
+
+    @languages = $base_class->_langtag_munging(@languages);
+
+    my %seen;
+    foreach my $module_name ( map { $base_class . '::' . $_ }  @languages ) {
+        next unless length $module_name; # sanity
+        next if $seen{$module_name}++        # Already been here, and it was no-go
+        || !&_try_use($module_name); # Try to use() it, but can't it.
+        return($module_name->new); # Make it!
+    }
+
+    return undef; # Fail!
+}
+
+###########################################################################
+
+sub _langtag_munging {
+    my($base_class, @languages) = @_;
+
+    # We have all these DEBUG statements because otherwise it's hard as hell
+    # to diagnose if/when something goes wrong.
+
+    DEBUG and warn 'Lgs1: ', map("<$_>", @languages), "\n";
+
+    if($USING_LANGUAGE_TAGS) {
+        DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
+        @languages     = $base_class->_add_supers( @languages );
+
+        push @languages, I18N::LangTags::panic_languages(@languages);
+        DEBUG and warn "After adding panic languages:\n",
+        ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
+
+        push @languages, $base_class->fallback_languages;
+        # You are free to override fallback_languages to return empty-list!
+        DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
+
+        @languages =  # final bit of processing to turn them into classname things
+        map {
+            my $it = $_;  # copy
+            $it =~ tr<-A-Z><_a-z>; # lc, and turn - to _
+            $it =~ tr<_a-z0-9><>cd;  # remove all but a-z0-9_
+            $it;
+        } @languages
+        ;
+        DEBUG and warn "Nearing end of munging:\n",
+        ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
+    }
+    else {
+        DEBUG and warn "Bypassing language-tags.\n",
+        ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
+    }
+
+    DEBUG and warn "Before adding fallback classes:\n",
+    ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
+
+    push @languages, $base_class->fallback_language_classes;
+    # You are free to override that to return whatever.
+
+    DEBUG and warn "Finally:\n",
+    ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
+
+    return @languages;
+}
+
+###########################################################################
+
+sub _ambient_langprefs {
+    return  I18N::LangTags::Detect::detect();
+}
+
+###########################################################################
+
+sub _add_supers {
+    my($base_class, @languages) = @_;
+
+    if (!$MATCH_SUPERS) {
+        # Nothing
+        DEBUG and warn "Bypassing any super-matching.\n",
+        ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
+
+    }
+    elsif( $MATCH_SUPERS_TIGHTLY ) {
+        DEBUG and warn "Before adding new supers tightly:\n",
+        ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
+        @languages = I18N::LangTags::implicate_supers( @languages );
+        DEBUG and warn "After adding new supers tightly:\n",
+        ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
+
+    }
+    else {
+        DEBUG and warn "Before adding supers to end:\n",
+        ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
+        @languages = I18N::LangTags::implicate_supers_strictly( @languages );
+        DEBUG and warn "After adding supers to end:\n",
+        ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
+    }
+
+    return @languages;
+}
+
+###########################################################################
+#
+# This is where most people should stop reading.
+#
+###########################################################################
+
+my %tried = ();
+# memoization of whether we've used this module, or found it unusable.
+
+sub _try_use {   # Basically a wrapper around "require Modulename"
+    # "Many men have tried..."  "They tried and failed?"  "They tried and died."
+    return $tried{$_[0]} if exists $tried{$_[0]};  # memoization
+
+    my $module = $_[0];   # ASSUME sane module name!
+    { no strict 'refs';
+        no warnings 'once';
+        return($tried{$module} = 1)
+        if %{$module . '::Lexicon'} or @{$module . '::ISA'};
+        # weird case: we never use'd it, but there it is!
+    }
+
+    DEBUG and warn " About to use $module ...\n";
+
+    local $SIG{'__DIE__'};
+    local $@;
+    local @INC = @INC;
+    pop @INC if $INC[-1] eq '.';
+    eval "require $module"; # used to be "use $module", but no point in that.
+
+    if($@) {
+        DEBUG and warn "Error using $module \: $@\n";
+        return $tried{$module} = 0;
+    }
+    else {
+        DEBUG and warn " OK, $module is used\n";
+        return $tried{$module} = 1;
+    }
+}
+
+#--------------------------------------------------------------------------
+
+sub _lex_refs {  # report the lexicon references for this handle's class
+    # returns an arrayREF!
+    no strict 'refs';
+    no warnings 'once';
+    my $class = ref($_[0]) || $_[0];
+    DEBUG and warn "Lex refs lookup on $class\n";
+    return $isa_scan{$class} if exists $isa_scan{$class};  # memoization!
+
+    my @lex_refs;
+    my $seen_r = ref($_[1]) ? $_[1] : {};
+
+    if( defined( *{$class . '::Lexicon'}{'HASH'} )) {
+        push @lex_refs, *{$class . '::Lexicon'}{'HASH'};
+        DEBUG and warn '%' . $class . '::Lexicon contains ',
+            scalar(keys %{$class . '::Lexicon'}), " entries\n";
+    }
+
+    # Implements depth(height?)-first recursive searching of superclasses.
+    # In hindsight, I suppose I could have just used Class::ISA!
+    foreach my $superclass (@{$class . '::ISA'}) {
+        DEBUG and warn " Super-class search into $superclass\n";
+        next if $seen_r->{$superclass}++;
+        push @lex_refs, @{&_lex_refs($superclass, $seen_r)};  # call myself
+    }
+
+    $isa_scan{$class} = \@lex_refs; # save for next time
+    return \@lex_refs;
+}
+
+sub clear_isa_scan { %isa_scan = (); return; } # end on a note of simplicity!
+
+#--------------------------------------------------------------------------
+
+sub _compile {
+    # This big scary routine compiles an entry.
+    # It returns either a coderef if there's brackety bits in this, or
+    #  otherwise a ref to a scalar.
+
+    my $string_to_compile = $_[1]; # There are taint issues using regex on @_ - perlbug 60378,27344
+
+    # The while() regex is more expensive than this check on strings that don't need a compile.
+    # this op causes a ~2% speed hit for strings that need compile and a 250% speed improvement
+    # on strings that don't need compiling.
+    return \"$string_to_compile" if($string_to_compile !~ m/[\[~\]]/ms); # return a string ref if chars [~] are not in the string
+
+    my $handle = $_[0];
+
+    my(@code);
+    my(@c) = (''); # "chunks" -- scratch.
+    my $call_count = 0;
+    my $big_pile = '';
+    {
+        my $in_group = 0; # start out outside a group
+        my($m, @params); # scratch
+
+        while($string_to_compile =~  # Iterate over chunks.
+            m/(
+                [^\~\[\]]+  # non-~[] stuff (Capture everything else here)
+                |
+                ~.       # ~[, ~], ~~, ~other
+                |
+                \[          # [ presumably opening a group
+                |
+                \]          # ] presumably closing a group
+                |
+                ~           # terminal ~ ?
+                |
+                $
+            )/xgs
+        ) {
+            DEBUG>2 and warn qq{  "$1"\n};
+
+            if($1 eq '[' or $1 eq '') {       # "[" or end
+                # Whether this is "[" or end, force processing of any
+                #  preceding literal.
+                if($in_group) {
+                    if($1 eq '') {
+                        $handle->_die_pointing($string_to_compile, 'Unterminated bracket group');
+                    }
+                    else {
+                        $handle->_die_pointing($string_to_compile, 'You can\'t nest bracket groups');
+                    }
+                }
+                else {
+                    if ($1 eq '') {
+                        DEBUG>2 and warn "   [end-string]\n";
+                    }
+                    else {
+                        $in_group = 1;
+                    }
+                    die "How come \@c is empty?? in <$string_to_compile>" unless @c; # sanity
+                    if(length $c[-1]) {
+                        # Now actually processing the preceding literal
+                        $big_pile .= $c[-1];
+                        if($USE_LITERALS and (
+                                (ord('A') == 65)
+                                ? $c[-1] !~ m/[^\x20-\x7E]/s
+                                # ASCII very safe chars
+                                : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
+                                # EBCDIC very safe chars
+                            )) {
+                            # normal case -- all very safe chars
+                            $c[-1] =~ s/'/\\'/g;
+                            push @code, q{ '} . $c[-1] . "',\n";
+                            $c[-1] = ''; # reuse this slot
+                        }
+                        else {
+                            $c[-1] =~ s/\\\\/\\/g;
+                            push @code, ' $c[' . $#c . "],\n";
+                            push @c, ''; # new chunk
+                        }
+                    }
+                    # else just ignore the empty string.
+                }
+
+            }
+            elsif($1 eq ']') {  # "]"
+                # close group -- go back in-band
+                if($in_group) {
+                    $in_group = 0;
+
+                    DEBUG>2 and warn "   --Closing group [$c[-1]]\n";
+
+                    # And now process the group...
+
+                    if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) {
+                        DEBUG>2 and warn "   -- (Ignoring)\n";
+                        $c[-1] = ''; # reset out chink
+                        next;
+                    }
+
+                    #$c[-1] =~ s/^\s+//s;
+                    #$c[-1] =~ s/\s+$//s;
+                    ($m,@params) = split(/,/, $c[-1], -1);  # was /\s*,\s*/
+
+                    # A bit of a hack -- we've turned "~,"'s into DELs, so turn
+                    #  'em into real commas here.
+                    if (ord('A') == 65) { # ASCII, etc
+                        foreach($m, @params) { tr/\x7F/,/ }
+                    }
+                    else {              # EBCDIC (1047, 0037, POSIX-BC)
+                        # Thanks to Peter Prymmer for the EBCDIC handling
+                        foreach($m, @params) { tr/\x07/,/ }
+                    }
+
+                    # Special-case handling of some method names:
+                    if($m eq '_*' or $m =~ m/^_(-?\d+)$/s) {
+                        # Treat [_1,...] as [,_1,...], etc.
+                        unshift @params, $m;
+                        $m = '';
+                    }
+                    elsif($m eq '*') {
+                        $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars"
+                    }
+                    elsif($m eq '#') {
+                        $m = 'numf';  # "#" for "number": [#,_1] for "the number _1"
+                    }
+
+                    # Most common case: a simple, legal-looking method name
+                    if($m eq '') {
+                        # 0-length method name means to just interpolate:
+                        push @code, ' (';
+                    }
+                    elsif($m =~ /^\w+$/s
+                        && !$handle->{'blacklist'}{$m}
+                        && ( !defined $handle->{'whitelist'} || $handle->{'whitelist'}{$m} )
+                        # exclude anything fancy and restrict to the whitelist/blacklist.
+                    ) {
+                        push @code, ' $_[0]->' . $m . '(';
+                    }
+                    else {
+                        # TODO: implement something?  or just too icky to consider?
+                        $handle->_die_pointing(
+                            $string_to_compile,
+                            "Can't use \"$m\" as a method name in bracket group",
+                            2 + length($c[-1])
+                        );
+                    }
+
+                    pop @c; # we don't need that chunk anymore
+                    ++$call_count;
+
+                    foreach my $p (@params) {
+                        if($p eq '_*') {
+                            # Meaning: all parameters except $_[0]
+                            $code[-1] .= ' @_[1 .. $#_], ';
+                            # and yes, that does the right thing for all @_ < 3
+                        }
+                        elsif($p =~ m/^_(-?\d+)$/s) {
+                            # _3 meaning $_[3]
+                            $code[-1] .= '$_[' . (0 + $1) . '], ';
+                        }
+                        elsif($USE_LITERALS and (
+                                (ord('A') == 65)
+                                ? $p !~ m/[^\x20-\x7E]/s
+                                # ASCII very safe chars
+                                : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
+                                # EBCDIC very safe chars
+                            )) {
+                            # Normal case: a literal containing only safe characters
+                            $p =~ s/'/\\'/g;
+                            $code[-1] .= q{'} . $p . q{', };
+                        }
+                        else {
+                            # Stow it on the chunk-stack, and just refer to that.
+                            push @c, $p;
+                            push @code, ' $c[' . $#c . '], ';
+                        }
+                    }
+                    $code[-1] .= "),\n";
+
+                    push @c, '';
+                }
+                else {
+                    $handle->_die_pointing($string_to_compile, q{Unbalanced ']'});
+                }
+
+            }
+            elsif(substr($1,0,1) ne '~') {
+                # it's stuff not containing "~" or "[" or "]"
+                # i.e., a literal blob
+                my $text = $1;
+                $text =~ s/\\/\\\\/g;
+                $c[-1] .= $text;
+
+            }
+            elsif($1 eq '~~') { # "~~"
+                $c[-1] .= '~';
+
+            }
+            elsif($1 eq '~[') { # "~["
+                $c[-1] .= '[';
+
+            }
+            elsif($1 eq '~]') { # "~]"
+                $c[-1] .= ']';
+
+            }
+            elsif($1 eq '~,') { # "~,"
+                if($in_group) {
+                    # This is a hack, based on the assumption that no-one will actually
+                    # want a DEL inside a bracket group.  Let's hope that's it's true.
+                    if (ord('A') == 65) { # ASCII etc
+                        $c[-1] .= "\x7F";
+                    }
+                    else {              # EBCDIC (cp 1047, 0037, POSIX-BC)
+                        $c[-1] .= "\x07";
+                    }
+                }
+                else {
+                    $c[-1] .= '~,';
+                }
+
+            }
+            elsif($1 eq '~') { # possible only at string-end, it seems.
+                $c[-1] .= '~';
+
+            }
+            else {
+                # It's a "~X" where X is not a special character.
+                # Consider it a literal ~ and X.
+                my $text = $1;
+                $text =~ s/\\/\\\\/g;
+                $c[-1] .= $text;
+            }
+        }
+    }
+
+    if($call_count) {
+        undef $big_pile; # Well, nevermind that.
+    }
+    else {
+        # It's all literals!  Ahwell, that can happen.
+        # So don't bother with the eval.  Return a SCALAR reference.
+        return \$big_pile;
+    }
+
+    die q{Last chunk isn't null??} if @c and length $c[-1]; # sanity
+    DEBUG and warn scalar(@c), " chunks under closure\n";
+    if(@code == 0) { # not possible?
+        DEBUG and warn "Empty code\n";
+        return \'';
+    }
+    elsif(@code > 1) { # most cases, presumably!
+        unshift @code, "join '',\n";
+    }
+    unshift @code, "use strict; sub {\n";
+    push @code, "}\n";
+
+    DEBUG and warn @code;
+    my $sub = eval(join '', @code);
+    die "$@ while evalling" . join('', @code) if $@; # Should be impossible.
+    return $sub;
+}
+
+#--------------------------------------------------------------------------
+
+sub _die_pointing {
+    # This is used by _compile to throw a fatal error
+    my $target = shift;
+    $target = ref($target) || $target; # class name
+                                       # ...leaving $_[0] the error-causing text, and $_[1] the error message
+
+    my $i = index($_[0], "\n");
+
+    my $pointy;
+    my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1;
+    if($pos < 1) {
+        $pointy = "^=== near there\n";
+    }
+    else { # we need to space over
+        my $first_tab = index($_[0], "\t");
+        if($pos > 2 and ( -1 == $first_tab  or  $first_tab > pos($_[0]))) {
+            # No tabs, or the first tab is harmlessly after where we will point to,
+            # AND we're far enough from the margin that we can draw a proper arrow.
+            $pointy = ('=' x $pos) . "^ near there\n";
+        }
+        else {
+            # tabs screw everything up!
+            $pointy = substr($_[0],0,$pos);
+            $pointy =~ tr/\t //cd;
+            # make everything into whitespace, but preserving tabs
+            $pointy .= "^=== near there\n";
+        }
+    }
+
+    my $errmsg = "$_[1], in\:\n$_[0]";
+
+    if($i == -1) {
+        # No newline.
+        $errmsg .= "\n" . $pointy;
+    }
+    elsif($i == (length($_[0]) - 1)  ) {
+        # Already has a newline at end.
+        $errmsg .= $pointy;
+    }
+    else {
+        # don't bother with the pointy bit, I guess.
+    }
+    Carp::croak( "$errmsg via $target, as used" );
+}
+
+1;
--- a/database/perl/lib/Locale/Maketext.pod
+++ b/database/perl/lib/Locale/Maketext.pod
--- a/database/perl/lib/Locale/Maketext/Cookbook.pod
+++ b/database/perl/lib/Locale/Maketext/Cookbook.pod
@@ -0,0 +1,150 @@
+# This document contains text in Perl "POD" format.
+# Use a POD viewer like perldoc or perlman to render it.
+
+=encoding utf-8
+
+=head1 NAME
+
+Locale::Maketext::Cookbook - recipes for using Locale::Maketext
+
+=head1 INTRODUCTION
+
+This is a work in progress. Not much progress by now :-)
+
+=head1 ONESIDED LEXICONS
+
+I<Adapted from a suggestion by Dan Muey>
+
+It may be common (for example at your main lexicon) that
+the hash keys and values coincide. Like that
+
+    q{Hello, tell me your name} 
+      => q{Hello, tell me your name}
+
+It would be nice to just write:
+
+    q{Hello, tell me your name} => ''
+
+and have this magically inflated to the first form.
+Among the advantages of such representation, that would
+lead to  
+smaller files, less prone to mistyping or mispasting, 
+and handy to someone translating it which can simply 
+copy the main lexicon and enter the translation 
+instead of having to remove the value first.
+
+That can be achieved by overriding C<init>
+in your class and working on the main lexicon
+with code like that:
+
+    package My::I18N;
+    ...
+
+    sub init {
+        my $lh = shift; # a newborn handle
+        $lh->SUPER::init();
+        inflate_lexicon(\%My::I18N::en::Lexicon);
+        return;
+    }
+
+    sub inflate_lexicon {
+        my $lex = shift;
+        while (my ($k, $v) = each %$lex) {
+            $v = $k if !defined $v || $v eq '';
+        }
+    }
+
+Here we are assuming C<My::I18N::en> to own the
+main lexicon.
+
+There are some downsides here: the size economy
+will not stand at runtime after this C<init()>
+runs. But it should not be that critical, since
+if you don't have space for that, you won't have
+space for any other language besides the main one
+as well. You could do that too with ties,
+expanding the value at lookup time which
+should be more time expensive as an option.
+
+=head1 DECIMAL PLACES IN NUMBER FORMATTING
+
+I<After CPAN RT #36136 (L<https://rt.cpan.org/Ticket/Display.html?id=36136>)>
+
+The documentation of L<Locale::Maketext> advises that 
+the standard bracket method C<numf> is limited and that
+you must override that for better results. It even
+suggests the use of L<Number::Format>.
+
+One such defect of standard C<numf> is to not be
+able to use a certain decimal precision.
+For example, 
+
+    $lh->maketext('pi is [numf,_1]', 355/113);
+
+outputs
+
+    pi is 3.14159292035398 
+
+Since pi ≈ 355/116 is only accurate 
+to 6 decimal places, you would want to say:
+
+    $lh->maketext('pi is [numf,_1,6]', 355/113); 
+
+and get "pi is 3.141592".
+
+One solution for that could use C<Number::Format>
+like that:
+
+    package Wuu;
+
+    use base qw(Locale::Maketext);
+
+    use Number::Format;
+
+    # can be overridden according to language conventions
+    sub _numf_params {
+        return (
+            -thousands_sep  => '.',
+            -decimal_point  => ',',
+            -decimal_digits => 2,
+        );
+    }
+
+    # builds a Number::Format
+    sub _numf_formatter {
+        my ($lh, $scale) = @_;
+        my @params = $lh->_numf_params;
+        if ($scale) { # use explicit scale rather than default
+            push @params, (-decimal_digits => $scale);
+        }
+        return Number::Format->new(@params);
+    }
+
+    sub numf {
+        my ($lh, $n, $scale) = @_;
+        # get the (cached) formatter
+        my $nf = $lh->{__nf}{$scale} ||= $lh->_numf_formatter($scale);
+        # format the number itself
+        return $nf->format_number($n);
+    }
+
+    package Wuu::pt;
+
+    use base qw(Wuu);
+
+and then 
+
+    my $lh = Wuu->get_handle('pt');
+    $lh->maketext('A [numf,_1,3] km de distância', 1550.2222);
+
+would return "A 1.550,222 km de distância".
+
+Notice that the standard utility methods of 
+C<Locale::Maketext> are irremediably limited
+because they could not aim to do everything
+that could be expected from them in different languages,
+cultures and applications. So extending C<numf>,
+C<quant>, and C<sprintf> is natural as soon
+as your needs exceed what the standard ones do.
+
+
--- a/database/perl/lib/Locale/Maketext/Guts.pm
+++ b/database/perl/lib/Locale/Maketext/Guts.pm
@@ -0,0 +1,24 @@
+package Locale::Maketext::Guts;
+
+use Locale::Maketext;
+
+our $VERSION = '1.20';
+
+=head1 NAME
+
+Locale::Maketext::Guts - Deprecated module to load Locale::Maketext utf8 code
+
+=head1 SYNOPSIS
+
+  # Do this instead please
+  use Locale::Maketext
+
+=head1 DESCRIPTION
+
+Previously Local::Maketext::GutsLoader performed some magic to load
+Locale::Maketext when utf8 was unavailable. The subs this module provided
+were merged back into Locale::Maketext
+
+=cut
+
+1;
--- a/database/perl/lib/Locale/Maketext/GutsLoader.pm
+++ b/database/perl/lib/Locale/Maketext/GutsLoader.pm
@@ -0,0 +1,26 @@
+package Locale::Maketext::GutsLoader;
+
+use Locale::Maketext;
+
+our $VERSION = '1.20';
+
+sub zorp { return scalar @_ }
+
+=head1 NAME
+
+Locale::Maketext::GutsLoader - Deprecated module to load Locale::Maketext utf8 code
+
+=head1 SYNOPSIS
+
+  # Do this instead please
+  use Locale::Maketext
+
+=head1 DESCRIPTION
+
+Previously Locale::Maketext::Guts performed some magic to load
+Locale::Maketext when utf8 was unavailable. The subs this module provided
+were merged back into Locale::Maketext.
+
+=cut
+
+1;
--- a/database/perl/lib/Locale/Maketext/Simple.pm
+++ b/database/perl/lib/Locale/Maketext/Simple.pm
@@ -0,0 +1,346 @@
+package Locale::Maketext::Simple;
+$Locale::Maketext::Simple::VERSION = '0.21_01';
+
+use strict;
+use 5.005;
+
+=head1 NAME
+
+Locale::Maketext::Simple - Simple interface to Locale::Maketext::Lexicon
+
+=head1 VERSION
+
+This document describes version 0.18 of Locale::Maketext::Simple,
+released Septermber 8, 2006.
+
+=head1 SYNOPSIS
+
+Minimal setup (looks for F<auto/Foo/*.po> and F<auto/Foo/*.mo>):
+
+    package Foo;
+    use Locale::Maketext::Simple;	# exports 'loc'
+    loc_lang('fr');			# set language to French
+    sub hello {
+	print loc("Hello, [_1]!", "World");
+    }
+
+More sophisticated example:
+
+    package Foo::Bar;
+    use Locale::Maketext::Simple (
+	Class	    => 'Foo',	    # search in auto/Foo/
+	Style	    => 'gettext',   # %1 instead of [_1]
+	Export	    => 'maketext',  # maketext() instead of loc()
+	Subclass    => 'L10N',	    # Foo::L10N instead of Foo::I18N
+	Decode	    => 1,	    # decode entries to unicode-strings
+	Encoding    => 'locale',    # but encode lexicons in current locale
+				    # (needs Locale::Maketext::Lexicon 0.36)
+    );
+    sub japh {
+	print maketext("Just another %1 hacker", "Perl");
+    }
+
+=head1 DESCRIPTION
+
+This module is a simple wrapper around B<Locale::Maketext::Lexicon>,
+designed to alleviate the need of creating I<Language Classes> for
+module authors.
+
+The language used is chosen from the loc_lang call. If a lookup is not
+possible, the i-default language will be used. If the lookup is not in the
+i-default language, then the key will be returned.
+
+If B<Locale::Maketext::Lexicon> is not present, it implements a
+minimal localization function by simply interpolating C<[_1]> with
+the first argument, C<[_2]> with the second, etc.  Interpolated
+function like C<[quant,_1]> are treated as C<[_1]>, with the sole
+exception of C<[tense,_1,X]>, which will append C<ing> to C<_1> when
+X is C<present>, or appending C<ed> to <_1> otherwise.
+
+=head1 OPTIONS
+
+All options are passed either via the C<use> statement, or via an
+explicit C<import>.
+
+=head2 Class
+
+By default, B<Locale::Maketext::Simple> draws its source from the
+calling package's F<auto/> directory; you can override this behaviour
+by explicitly specifying another package as C<Class>.
+
+=head2 Path
+
+If your PO and MO files are under a path elsewhere than C<auto/>,
+you may specify it using the C<Path> option.
+
+=head2 Style
+
+By default, this module uses the C<maketext> style of C<[_1]> and
+C<[quant,_1]> for interpolation.  Alternatively, you can specify the
+C<gettext> style, which uses C<%1> and C<%quant(%1)> for interpolation.
+
+This option is case-insensitive.
+
+=head2 Export
+
+By default, this module exports a single function, C<loc>, into its
+caller's namespace.  You can set it to another name, or set it to
+an empty string to disable exporting.
+
+=head2 Subclass
+
+By default, this module creates an C<::I18N> subclass under the
+caller's package (or the package specified by C<Class>), and stores
+lexicon data in its subclasses.  You can assign a name other than
+C<I18N> via this option.
+
+=head2 Decode
+
+If set to a true value, source entries will be converted into
+utf8-strings (available in Perl 5.6.1 or later).  This feature
+needs the B<Encode> or B<Encode::compat> module.
+
+=head2 Encoding
+
+Specifies an encoding to store lexicon entries, instead of
+utf8-strings.  If set to C<locale>, the encoding from the current
+locale setting is used.  Implies a true value for C<Decode>.
+
+=cut
+
+sub import {
+    my ($class, %args) = @_;
+
+    $args{Class}    ||= caller;
+    $args{Style}    ||= 'maketext';
+    $args{Export}   ||= 'loc';
+    $args{Subclass} ||= 'I18N';
+
+    my ($loc, $loc_lang) = $class->load_loc(%args);
+    $loc ||= $class->default_loc(%args);
+
+    no strict 'refs';
+    *{caller(0) . "::$args{Export}"} = $loc if $args{Export};
+    *{caller(0) . "::$args{Export}_lang"} = $loc_lang || sub { 1 };
+}
+
+my %Loc;
+
+sub reload_loc { %Loc = () }
+
+sub load_loc {
+    my ($class, %args) = @_;
+
+    my $pkg = join('::', grep { defined and length } $args{Class}, $args{Subclass});
+    return $Loc{$pkg} if exists $Loc{$pkg};
+
+    eval {
+        local @INC = @INC;
+        pop @INC if $INC[-1] eq '.';
+        require Locale::Maketext::Lexicon;
+        1
+    } or return;
+    $Locale::Maketext::Lexicon::VERSION > 0.20	    or return;
+    eval { require File::Spec; 1 }		    or return;
+
+    my $path = $args{Path} || $class->auto_path($args{Class}) or return;
+    my $pattern = File::Spec->catfile($path, '*.[pm]o');
+    my $decode = $args{Decode} || 0;
+    my $encoding = $args{Encoding} || undef;
+
+    $decode = 1 if $encoding;
+
+    $pattern =~ s{\\}{/}g; # to counter win32 paths
+
+    eval "
+	package $pkg;
+	use base 'Locale::Maketext';
+	Locale::Maketext::Lexicon->import({
+	    'i-default' => [ 'Auto' ],
+	    '*'	=> [ Gettext => \$pattern ],
+	    _decode => \$decode,
+	    _encoding => \$encoding,
+	});
+	*${pkg}::Lexicon = \\%${pkg}::i_default::Lexicon;
+	*tense = sub { \$_[1] . ((\$_[2] eq 'present') ? 'ing' : 'ed') }
+	    unless defined &tense;
+
+	1;
+    " or die $@;
+
+    my $lh = eval { $pkg->get_handle } or return;
+    my $style = lc($args{Style});
+    if ($style eq 'maketext') {
+	$Loc{$pkg} = sub {
+	    $lh->maketext(@_)
+	};
+    }
+    elsif ($style eq 'gettext') {
+	$Loc{$pkg} = sub {
+	    my $str = shift;
+            $str =~ s{([\~\[\]])}{~$1}g;
+            $str =~ s{
+                ([%\\]%)                        # 1 - escaped sequence
+            |
+                %   (?:
+                        ([A-Za-z#*]\w*)         # 2 - function call
+                            \(([^\)]*)\)        # 3 - arguments
+                    |
+                        ([1-9]\d*|\*)           # 4 - variable
+                    )
+            }{
+                $1 ? $1
+                   : $2 ? "\[$2,"._unescape($3)."]"
+                        : "[_$4]"
+            }egx;
+	    return $lh->maketext($str, @_);
+	};
+    }
+    else {
+	die "Unknown Style: $style";
+    }
+
+    return $Loc{$pkg}, sub {
+	$lh = $pkg->get_handle(@_);
+    };
+}
+
+sub default_loc {
+    my ($self, %args) = @_;
+    my $style = lc($args{Style});
+    if ($style eq 'maketext') {
+	return sub {
+	    my $str = shift;
+            $str =~ s{((?<!~)(?:~~)*)\[_([1-9]\d*|\*)\]}
+                     {$1%$2}g;
+            $str =~ s{((?<!~)(?:~~)*)\[([A-Za-z#*]\w*),([^\]]+)\]}
+                     {"$1%$2(" . _escape($3) . ')'}eg;
+	    _default_gettext($str, @_);
+	};
+    }
+    elsif ($style eq 'gettext') {
+	return \&_default_gettext;
+    }
+    else {
+	die "Unknown Style: $style";
+    }
+}
+
+sub _default_gettext {
+    my $str = shift;
+    $str =~ s{
+	%			# leading symbol
+	(?:			# either one of
+	    \d+			#   a digit, like %1
+	    |			#     or
+	    (\w+)\(		#   a function call -- 1
+		(?:		#     either
+		    %\d+	#	an interpolation
+		    |		#     or
+		    ([^,]*)	#	some string -- 2
+		)		#     end either
+		(?:		#     maybe followed
+		    ,		#       by a comma
+		    ([^),]*)	#       and a param -- 3
+		)?		#     end maybe
+		(?:		#     maybe followed
+		    ,		#       by another comma
+		    ([^),]*)	#       and a param -- 4
+		)?		#     end maybe
+		[^)]*		#     and other ignorable params
+	    \)			#   closing function call
+	)			# closing either one of
+    }{
+	my $digit = $2 || shift;
+	$digit . (
+	    $1 ? (
+		($1 eq 'tense') ? (($3 eq 'present') ? 'ing' : 'ed') :
+		($1 eq 'quant') ? ' ' . (($digit > 1) ? ($4 || "$3s") : $3) :
+		''
+	    ) : ''
+	);
+    }egx;
+    return $str;
+};
+
+sub _escape {
+    my $text = shift;
+    $text =~ s/\b_([1-9]\d*)/%$1/g;
+    return $text;
+}
+
+sub _unescape {
+    join(',', map {
+        /\A(\s*)%([1-9]\d*|\*)(\s*)\z/ ? "$1_$2$3" : $_
+    } split(/,/, $_[0]));
+}
+
+sub auto_path {
+    my ($self, $calldir) = @_;
+    $calldir =~ s#::#/#g;
+    my $path = $INC{$calldir . '.pm'} or return;
+
+    # Try absolute path name.
+    if ($^O eq 'MacOS') {
+	(my $malldir = $calldir) =~ tr#/#:#;
+	$path =~ s#^(.*)$malldir\.pm\z#$1auto:$malldir:#s;
+    } else {
+	$path =~ s#^(.*)$calldir\.pm\z#$1auto/$calldir/#;
+    }
+
+    return $path if -d $path;
+
+    # If that failed, try relative path with normal @INC searching.
+    $path = "auto/$calldir/";
+    foreach my $inc (@INC) {
+	return "$inc/$path" if -d "$inc/$path";
+    }
+
+    return;
+}
+
+1;
+
+=head1 ACKNOWLEDGMENTS
+
+Thanks to Jos I. Boumans for suggesting this module to be written.
+
+Thanks to Chia-Liang Kao for suggesting C<Path> and C<loc_lang>.
+
+=head1 SEE ALSO
+
+L<Locale::Maketext>, L<Locale::Maketext::Lexicon>
+
+=head1 AUTHORS
+
+Audrey Tang E<lt>cpan@audreyt.orgE<gt>
+
+=head1 COPYRIGHT
+
+Copyright 2003, 2004, 2005, 2006 by Audrey Tang E<lt>cpan@audreyt.orgE<gt>.
+
+This software is released under the MIT license cited below.  Additionally,
+when this software is distributed with B<Perl Kit, Version 5>, you may also
+redistribute it and/or modify it under the same terms as Perl itself.
+
+=head2 The "MIT" License
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+
+=cut
--- a/database/perl/lib/Locale/Maketext/TPJ13.pod
+++ b/database/perl/lib/Locale/Maketext/TPJ13.pod
@@ -0,0 +1,775 @@
+# This document contains text in Perl "POD" format.
+# Use a POD viewer like perldoc or perlman to render it.
+
+=head1 NAME
+
+Locale::Maketext::TPJ13 -- article about software localization
+
+=head1 SYNOPSIS
+
+  # This an article, not a module.
+
+=head1 DESCRIPTION
+
+The following article by Sean M. Burke and Jordan Lachler
+first appeared in I<The Perl Journal> #13
+and is copyright 1999 The Perl Journal. It appears
+courtesy of Jon Orwant and The Perl Journal.  This document may be
+distributed under the same terms as Perl itself.
+
+=head1 Localization and Perl: gettext breaks, Maketext fixes
+
+by Sean M. Burke and Jordan Lachler
+
+This article points out cases where gettext (a common system for
+localizing software interfaces -- i.e., making them work in the user's
+language of choice) fails because of basic differences between human
+languages.  This article then describes Maketext, a new system capable
+of correctly treating these differences.
+
+=head2 A Localization Horror Story: It Could Happen To You
+
+=over
+
+"There are a number of languages spoken by human beings in this
+world."
+
+-- Harald Tveit Alvestrand, in RFC 1766, "Tags for the
+Identification of Languages"
+
+=back
+
+Imagine that your task for the day is to localize a piece of software
+-- and luckily for you, the only output the program emits is two
+messages, like this:
+
+  I scanned 12 directories.
+
+  Your query matched 10 files in 4 directories.
+
+So how hard could that be?  You look at the code that
+produces the first item, and it reads:
+
+  printf("I scanned %g directories.",
+         $directory_count);
+
+You think about that, and realize that it doesn't even work right for
+English, as it can produce this output:
+
+  I scanned 1 directories.
+
+So you rewrite it to read:
+
+  printf("I scanned %g %s.",
+         $directory_count,
+         $directory_count == 1 ?
+           "directory" : "directories",
+  );
+
+...which does the Right Thing.  (In case you don't recall, "%g" is for
+locale-specific number interpolation, and "%s" is for string
+interpolation.)
+
+But you still have to localize it for all the languages you're
+producing this software for, so you pull Locale::gettext off of CPAN
+so you can access the C<gettext> C functions you've heard are standard
+for localization tasks.
+
+And you write:
+
+  printf(gettext("I scanned %g %s."),
+         $dir_scan_count,
+         $dir_scan_count == 1 ?
+           gettext("directory") : gettext("directories"),
+  );
+
+But you then read in the gettext manual (Drepper, Miller, and Pinard 1995)
+that this is not a good idea, since how a single word like "directory"
+or "directories" is translated may depend on context -- and this is
+true, since in a case language like German or Russian, you'd may need
+these words with a different case ending in the first instance (where the
+word is the object of a verb) than in the second instance, which you haven't even
+gotten to yet (where the word is the object of a preposition, "in %g
+directories") -- assuming these keep the same syntax when translated
+into those languages.
+
+So, on the advice of the gettext manual, you rewrite:
+
+  printf( $dir_scan_count == 1 ?
+           gettext("I scanned %g directory.") :
+           gettext("I scanned %g directories."),
+         $dir_scan_count );
+
+So, you email your various translators (the boss decides that the
+languages du jour are Chinese, Arabic, Russian, and Italian, so you
+have one translator for each), asking for translations for "I scanned
+%g directory." and "I scanned %g directories.".  When they reply,
+you'll put that in the lexicons for gettext to use when it localizes
+your software, so that when the user is running under the "zh"
+(Chinese) locale, gettext("I scanned %g directory.") will return the
+appropriate Chinese text, with a "%g" in there where printf can then
+interpolate $dir_scan.
+
+Your Chinese translator emails right back -- he says both of these
+phrases translate to the same thing in Chinese, because, in linguistic
+jargon, Chinese "doesn't have number as a grammatical category" --
+whereas English does.  That is, English has grammatical rules that
+refer to "number", i.e., whether something is grammatically singular
+or plural; and one of these rules is the one that forces nouns to take
+a plural suffix (generally "s") when in a plural context, as they are when
+they follow a number other than "one" (including, oddly enough, "zero").
+Chinese has no such rules, and so has just the one phrase where English
+has two.  But, no problem, you can have this one Chinese phrase appear
+as the translation for the two English phrases in the "zh" gettext
+lexicon for your program.
+
+Emboldened by this, you dive into the second phrase that your software
+needs to output: "Your query matched 10 files in 4 directories.".  You notice
+that if you want to treat phrases as indivisible, as the gettext
+manual wisely advises, you need four cases now, instead of two, to
+cover the permutations of singular and plural on the two items,
+$dir_count and $file_count.  So you try this:
+
+  printf( $file_count == 1 ?
+    ( $directory_count == 1 ?
+     gettext("Your query matched %g file in %g directory.") :
+     gettext("Your query matched %g file in %g directories.") ) :
+    ( $directory_count == 1 ?
+     gettext("Your query matched %g files in %g directory.") :
+     gettext("Your query matched %g files in %g directories.") ),
+   $file_count, $directory_count,
+  );
+
+(The case of "1 file in 2 [or more] directories" could, I suppose,
+occur in the case of symlinking or something of the sort.)
+
+It occurs to you that this is not the prettiest code you've ever
+written, but this seems the way to go.  You mail off to the
+translators asking for translations for these four cases.  The
+Chinese guy replies with the one phrase that these all translate to in
+Chinese, and that phrase has two "%g"s in it, as it should -- but
+there's a problem.  He translates it word-for-word back: "In %g
+directories contains %g files match your query."  The %g
+slots are in an order reverse to what they are in English.  You wonder
+how you'll get gettext to handle that.
+
+But you put it aside for the moment, and optimistically hope that the
+other translators won't have this problem, and that their languages
+will be better behaved -- i.e., that they will be just like English.
+
+But the Arabic translator is the next to write back.  First off, your
+code for "I scanned %g directory." or "I scanned %g directories."
+assumes there's only singular or plural.  But, to use linguistic
+jargon again, Arabic has grammatical number, like English (but unlike
+Chinese), but it's a three-term category: singular, dual, and plural.
+In other words, the way you say "directory" depends on whether there's
+one directory, or I<two> of them, or I<more than two> of them.  Your
+test of C<($directory == 1)> no longer does the job.  And it means
+that where English's grammatical category of number necessitates
+only the two permutations of the first sentence based on "directory
+[singular]" and "directories [plural]", Arabic has three -- and,
+worse, in the second sentence ("Your query matched %g file in %g
+directory."), where English has four, Arabic has nine.  You sense
+an unwelcome, exponential trend taking shape.
+
+Your Italian translator emails you back and says that "I searched 0
+directories" (a possible English output of your program) is stilted,
+and if you think that's fine English, that's your problem, but that
+I<just will not do> in the language of Dante.  He insists that where
+$directory_count is 0, your program should produce the Italian text
+for "I I<didn't> scan I<any> directories.".  And ditto for "I didn't
+match any files in any directories", although he says the last part
+about "in any directories" should probably just be left off.
+
+You wonder how you'll get gettext to handle this; to accommodate the
+ways Arabic, Chinese, and Italian deal with numbers in just these few
+very simple phrases, you need to write code that will ask gettext for
+different queries depending on whether the numerical values in
+question are 1, 2, more than 2, or in some cases 0, and you still haven't
+figured out the problem with the different word order in Chinese.
+
+Then your Russian translator calls on the phone, to I<personally> tell
+you the bad news about how really unpleasant your life is about to
+become:
+
+Russian, like German or Latin, is an inflectional language; that is, nouns
+and adjectives have to take endings that depend on their case
+(i.e., nominative, accusative, genitive, etc...) -- which is roughly a matter of
+what role they have in syntax of the sentence --
+as well as on the grammatical gender (i.e., masculine, feminine, neuter)
+and number (i.e., singular or plural) of the noun, as well as on the
+declension class of the noun.  But unlike with most other inflected languages,
+putting a number-phrase (like "ten" or "forty-three", or their Arabic
+numeral equivalents) in front of noun in Russian can change the case and
+number that noun is, and therefore the endings you have to put on it.
+
+He elaborates:  In "I scanned %g directories", you'd I<expect>
+"directories" to be in the accusative case (since it is the direct
+object in the sentence) and the plural number,
+except where $directory_count is 1, then you'd expect the singular, of
+course.  Just like Latin or German.  I<But!>  Where $directory_count %
+10 is 1 ("%" for modulo, remember), assuming $directory count is an
+integer, and except where $directory_count % 100 is 11, "directories"
+is forced to become grammatically singular, which means it gets the
+ending for the accusative singular...  You begin to visualize the code
+it'd take to test for the problem so far, I<and still work for Chinese
+and Arabic and Italian>, and how many gettext items that'd take, but
+he keeps going...  But where $directory_count % 10 is 2, 3, or 4
+(except where $directory_count % 100 is 12, 13, or 14), the word for
+"directories" is forced to be genitive singular -- which means another
+ending... The room begins to spin around you, slowly at first...  But
+with I<all other> integer values, since "directory" is an inanimate
+noun, when preceded by a number and in the nominative or accusative
+cases (as it is here, just your luck!), it does stay plural, but it is
+forced into the genitive case -- yet another ending...  And
+you never hear him get to the part about how you're going to run into
+similar (but maybe subtly different) problems with other Slavic
+languages like Polish, because the floor comes up to meet you, and you
+fade into unconsciousness.
+
+
+The above cautionary tale relates how an attempt at localization can
+lead from programmer consternation, to program obfuscation, to a need
+for sedation.  But careful evaluation shows that your choice of tools
+merely needed further consideration.
+
+=head2 The Linguistic View
+
+=over
+
+"It is more complicated than you think." 
+
+-- The Eighth Networking Truth, from RFC 1925
+
+=back
+
+The field of Linguistics has expended a great deal of effort over the
+past century trying to find grammatical patterns which hold across
+languages; it's been a constant process
+of people making generalizations that should apply to all languages,
+only to find out that, all too often, these generalizations fail --
+sometimes failing for just a few languages, sometimes whole classes of
+languages, and sometimes nearly every language in the world except
+English.  Broad statistical trends are evident in what the "average
+language" is like as far as what its rules can look like, must look
+like, and cannot look like.  But the "average language" is just as
+unreal a concept as the "average person" -- it runs up against the
+fact no language (or person) is, in fact, average.  The wisdom of past
+experience leads us to believe that any given language can do whatever
+it wants, in any order, with appeal to any kind of grammatical
+categories wants -- case, number, tense, real or metaphoric
+characteristics of the things that words refer to, arbitrary or
+predictable classifications of words based on what endings or prefixes
+they can take, degree or means of certainty about the truth of
+statements expressed, and so on, ad infinitum.
+
+Mercifully, most localization tasks are a matter of finding ways to
+translate whole phrases, generally sentences, where the context is
+relatively set, and where the only variation in content is I<usually>
+in a number being expressed -- as in the example sentences above.
+Translating specific, fully-formed sentences is, in practice, fairly
+foolproof -- which is good, because that's what's in the phrasebooks
+that so many tourists rely on.  Now, a given phrase (whether in a
+phrasebook or in a gettext lexicon) in one language I<might> have a
+greater or lesser applicability than that phrase's translation into
+another language -- for example, strictly speaking, in Arabic, the
+"your" in "Your query matched..." would take a different form
+depending on whether the user is male or female; so the Arabic
+translation "your[feminine] query" is applicable in fewer cases than
+the corresponding English phrase, which doesn't distinguish the user's
+gender.  (In practice, it's not feasible to have a program know the
+user's gender, so the masculine "you" in Arabic is usually used, by
+default.)
+
+But in general, such surprises are rare when entire sentences are
+being translated, especially when the functional context is restricted
+to that of a computer interacting with a user either to convey a fact
+or to prompt for a piece of information.  So, for purposes of
+localization, translation by phrase (generally by sentence) is both the
+simplest and the least problematic.
+
+=head2 Breaking gettext
+
+=over
+
+"It Has To Work."
+
+-- First Networking Truth, RFC 1925
+
+=back
+
+Consider that sentences in a tourist phrasebook are of two types: ones
+like "How do I get to the marketplace?" that don't have any blanks to
+fill in, and ones like "How much do these ___ cost?", where there's
+one or more blanks to fill in (and these are usually linked to a
+list of words that you can put in that blank: "fish", "potatoes",
+"tomatoes", etc.).  The ones with no blanks are no problem, but the
+fill-in-the-blank ones may not be really straightforward. If it's a
+Swahili phrasebook, for example, the authors probably didn't bother to
+tell you the complicated ways that the verb "cost" changes its
+inflectional prefix depending on the noun you're putting in the blank.
+The trader in the marketplace will still understand what you're saying if
+you say "how much do these potatoes cost?" with the wrong
+inflectional prefix on "cost".  After all, I<you> can't speak proper Swahili,
+I<you're> just a tourist.  But while tourists can be stupid, computers
+are supposed to be smart; the computer should be able to fill in the
+blank, and still have the results be grammatical.
+
+In other words, a phrasebook entry takes some values as parameters
+(the things that you fill in the blank or blanks), and provides a value
+based on these parameters, where the way you get that final value from
+the given values can, properly speaking, involve an arbitrarily
+complex series of operations.  (In the case of Chinese, it'd be not at
+all complex, at least in cases like the examples at the beginning of
+this article; whereas in the case of Russian it'd be a rather complex
+series of operations.  And in some languages, the
+complexity could be spread around differently: while the act of
+putting a number-expression in front of a noun phrase might not be
+complex by itself, it may change how you have to, for example, inflect
+a verb elsewhere in the sentence.  This is what in syntax is called
+"long-distance dependencies".)
+
+This talk of parameters and arbitrary complexity is just another way
+to say that an entry in a phrasebook is what in a programming language
+would be called a "function".  Just so you don't miss it, this is the
+crux of this article: I<A phrase is a function; a phrasebook is a
+bunch of functions.>
+
+The reason that using gettext runs into walls (as in the above
+second-person horror story) is that you're trying to use a string (or
+worse, a choice among a bunch of strings) to do what you really need a
+function for -- which is futile.  Preforming (s)printf interpolation
+on the strings which you get back from gettext does allow you to do I<some>
+common things passably well... sometimes... sort of; but, to paraphrase
+what some people say about C<csh> script programming, "it fools you
+into thinking you can use it for real things, but you can't, and you
+don't discover this until you've already spent too much time trying,
+and by then it's too late."
+
+=head2 Replacing gettext
+
+So, what needs to replace gettext is a system that supports lexicons
+of functions instead of lexicons of strings.  An entry in a lexicon
+from such a system should I<not> look like this:
+
+  "J'ai trouv\xE9 %g fichiers dans %g r\xE9pertoires"
+
+[\xE9 is e-acute in Latin-1.  Some pod renderers would
+scream if I used the actual character here. -- SB]
+
+but instead like this, bearing in mind that this is just a first stab:
+
+  sub I_found_X1_files_in_X2_directories {
+    my( $files, $dirs ) = @_[0,1];
+    $files = sprintf("%g %s", $files,
+      $files == 1 ? 'fichier' : 'fichiers');
+    $dirs = sprintf("%g %s", $dirs,
+      $dirs == 1 ? "r\xE9pertoire" : "r\xE9pertoires");
+    return "J'ai trouv\xE9 $files dans $dirs.";
+  }
+
+Now, there's no particularly obvious way to store anything but strings
+in a gettext lexicon; so it looks like we just have to start over and
+make something better, from scratch.  I call my shot at a
+gettext-replacement system "Maketext", or, in CPAN terms,
+Locale::Maketext.
+
+When designing Maketext, I chose to plan its main features in terms of
+"buzzword compliance".  And here are the buzzwords:
+
+=head2 Buzzwords: Abstraction and Encapsulation
+
+The complexity of the language you're trying to output a phrase in is
+entirely abstracted inside (and encapsulated within) the Maketext module
+for that interface.  When you call:
+
+  print $lang->maketext("You have [quant,_1,piece] of new mail.",
+                       scalar(@messages));
+
+you don't know (and in fact can't easily find out) whether this will
+involve lots of figuring, as in Russian (if $lang is a handle to the
+Russian module), or relatively little, as in Chinese.  That kind of
+abstraction and encapsulation may encourage other pleasant buzzwords
+like modularization and stratification, depending on what design
+decisions you make.
+
+=head2 Buzzword: Isomorphism
+
+"Isomorphism" means "having the same structure or form"; in discussions
+of program design, the word takes on the special, specific meaning that
+your implementation of a solution to a problem I<has the same
+structure> as, say, an informal verbal description of the solution, or
+maybe of the problem itself.  Isomorphism is, all things considered,
+a good thing -- it's what problem-solving (and solution-implementing)
+should look like.
+
+What's wrong the with gettext-using code like this...
+
+  printf( $file_count == 1 ?
+    ( $directory_count == 1 ?
+     "Your query matched %g file in %g directory." :
+     "Your query matched %g file in %g directories." ) :
+    ( $directory_count == 1 ?
+     "Your query matched %g files in %g directory." :
+     "Your query matched %g files in %g directories." ),
+   $file_count, $directory_count,
+  );
+
+is first off that it's not well abstracted -- these ways of testing
+for grammatical number (as in the expressions like C<foo == 1 ?
+singular_form : plural_form>) should be abstracted to each language
+module, since how you get grammatical number is language-specific.
+
+But second off, it's not isomorphic -- the "solution" (i.e., the
+phrasebook entries) for Chinese maps from these four English phrases to
+the one Chinese phrase that fits for all of them.  In other words, the
+informal solution would be "The way to say what you want in Chinese is
+with the one phrase 'For your question, in Y directories you would
+find X files'" -- and so the implemented solution should be,
+isomorphically, just a straightforward way to spit out that one
+phrase, with numerals properly interpolated.  It shouldn't have to map
+from the complexity of other languages to the simplicity of this one.
+
+=head2 Buzzword: Inheritance
+
+There's a great deal of reuse possible for sharing of phrases between
+modules for related dialects, or for sharing of auxiliary functions
+between related languages.  (By "auxiliary functions", I mean
+functions that don't produce phrase-text, but which, say, return an
+answer to "does this number require a plural noun after it?".  Such
+auxiliary functions would be used in the internal logic of functions
+that actually do produce phrase-text.)
+
+In the case of sharing phrases, consider that you have an interface
+already localized for American English (probably by having been
+written with that as the native locale, but that's incidental).
+Localizing it for UK English should, in practical terms, be just a
+matter of running it past a British person with the instructions to
+indicate what few phrases would benefit from a change in spelling or
+possibly minor rewording.  In that case, you should be able to put in
+the UK English localization module I<only> those phrases that are
+UK-specific, and for all the rest, I<inherit> from the American
+English module.  (And I expect this same situation would apply with
+Brazilian and Continental Portugese, possibly with some I<very>
+closely related languages like Czech and Slovak, and possibly with the
+slightly different "versions" of written Mandarin Chinese, as I hear exist in
+Taiwan and mainland China.)
+
+As to sharing of auxiliary functions, consider the problem of Russian
+numbers from the beginning of this article; obviously, you'd want to
+write only once the hairy code that, given a numeric value, would
+return some specification of which case and number a given quantified
+noun should use.  But suppose that you discover, while localizing an
+interface for, say, Ukranian (a Slavic language related to Russian,
+spoken by several million people, many of whom would be relieved to
+find that your Web site's or software's interface is available in
+their language), that the rules in Ukranian are the same as in Russian
+for quantification, and probably for many other grammatical functions.
+While there may well be no phrases in common between Russian and
+Ukranian, you could still choose to have the Ukranian module inherit
+from the Russian module, just for the sake of inheriting all the
+various grammatical methods.  Or, probably better organizationally,
+you could move those functions to a module called C<_E_Slavic> or
+something, which Russian and Ukrainian could inherit useful functions
+from, but which would (presumably) provide no lexicon.
+
+=head2 Buzzword: Concision
+
+Okay, concision isn't a buzzword.  But it should be, so I decree that
+as a new buzzword, "concision" means that simple common things should
+be expressible in very few lines (or maybe even just a few characters)
+of code -- call it a special case of "making simple things easy and
+hard things possible", and see also the role it played in the
+MIDI::Simple language, discussed elsewhere in this issue [TPJ#13].
+
+Consider our first stab at an entry in our "phrasebook of functions":
+
+  sub I_found_X1_files_in_X2_directories {
+    my( $files, $dirs ) = @_[0,1];
+    $files = sprintf("%g %s", $files,
+      $files == 1 ? 'fichier' : 'fichiers');
+    $dirs = sprintf("%g %s", $dirs,
+      $dirs == 1 ? "r\xE9pertoire" : "r\xE9pertoires");
+    return "J'ai trouv\xE9 $files dans $dirs.";
+  }
+
+You may sense that a lexicon (to use a non-committal catch-all term for a
+collection of things you know how to say, regardless of whether they're
+phrases or words) consisting of functions I<expressed> as above would
+make for rather long-winded and repetitive code -- even if you wisely
+rewrote this to have quantification (as we call adding a number
+expression to a noun phrase) be a function called like:
+
+  sub I_found_X1_files_in_X2_directories {
+    my( $files, $dirs ) = @_[0,1];
+    $files = quant($files, "fichier");
+    $dirs =  quant($dirs,  "r\xE9pertoire");
+    return "J'ai trouv\xE9 $files dans $dirs.";
+  }
+
+And you may also sense that you do not want to bother your translators
+with having to write Perl code -- you'd much rather that they spend
+their I<very costly time> on just translation.  And this is to say
+nothing of the near impossibility of finding a commercial translator
+who would know even simple Perl.
+
+In a first-hack implementation of Maketext, each language-module's
+lexicon looked like this:
+
+ %Lexicon = (
+   "I found %g files in %g directories"
+   => sub {
+      my( $files, $dirs ) = @_[0,1];
+      $files = quant($files, "fichier");
+      $dirs =  quant($dirs,  "r\xE9pertoire");
+      return "J'ai trouv\xE9 $files dans $dirs.";
+    },
+  ... and so on with other phrase => sub mappings ...
+ );
+
+but I immediately went looking for some more concise way to basically
+denote the same phrase-function -- a way that would also serve to
+concisely denote I<most> phrase-functions in the lexicon for I<most>
+languages.  After much time and even some actual thought, I decided on
+this system:
+
+* Where a value in a %Lexicon hash is a contentful string instead of
+an anonymous sub (or, conceivably, a coderef), it would be interpreted
+as a sort of shorthand expression of what the sub does.  When accessed
+for the first time in a session, it is parsed, turned into Perl code,
+and then eval'd into an anonymous sub; then that sub replaces the
+original string in that lexicon.  (That way, the work of parsing and
+evaling the shorthand form for a given phrase is done no more than
+once per session.)
+
+* Calls to C<maketext> (as Maketext's main function is called) happen
+thru a "language session handle", notionally very much like an IO
+handle, in that you open one at the start of the session, and use it
+for "sending signals" to an object in order to have it return the text
+you want.
+
+So, this:
+
+  $lang->maketext("You have [quant,_1,piece] of new mail.",
+                 scalar(@messages));
+
+basically means this: look in the lexicon for $lang (which may inherit
+from any number of other lexicons), and find the function that we
+happen to associate with the string "You have [quant,_1,piece] of new
+mail" (which is, and should be, a functioning "shorthand" for this
+function in the native locale -- English in this case).  If you find
+such a function, call it with $lang as its first parameter (as if it
+were a method), and then a copy of scalar(@messages) as its second,
+and then return that value.  If that function was found, but was in
+string shorthand instead of being a fully specified function, parse it
+and make it into a function before calling it the first time.
+
+* The shorthand uses code in brackets to indicate method calls that
+should be performed.  A full explanation is not in order here, but a
+few examples will suffice:
+
+  "You have [quant,_1,piece] of new mail."
+
+The above code is shorthand for, and will be interpreted as,
+this:
+
+  sub {
+    my $handle = $_[0];
+    my(@params) = @_;
+    return join '',
+      "You have ",
+      $handle->quant($params[1], 'piece'),
+      "of new mail.";
+  }
+
+where "quant" is the name of a method you're using to quantify the
+noun "piece" with the number $params[0].
+
+A string with no brackety calls, like this:
+
+  "Your search expression was malformed."
+
+is somewhat of a degenerate case, and just gets turned into:
+
+  sub { return "Your search expression was malformed." }
+
+However, not everything you can write in Perl code can be written in
+the above shorthand system -- not by a long shot.  For example, consider
+the Italian translator from the beginning of this article, who wanted
+the Italian for "I didn't find any files" as a special case, instead
+of "I found 0 files".  That couldn't be specified (at least not easily
+or simply) in our shorthand system, and it would have to be written
+out in full, like this:
+
+  sub {  # pretend the English strings are in Italian
+    my($handle, $files, $dirs) = @_[0,1,2];
+    return "I didn't find any files" unless $files;
+    return join '',
+      "I found ",
+      $handle->quant($files, 'file'),
+      " in ",
+      $handle->quant($dirs,  'directory'),
+      ".";
+  }
+
+Next to a lexicon full of shorthand code, that sort of sticks out like a
+sore thumb -- but this I<is> a special case, after all; and at least
+it's possible, if not as concise as usual.
+
+As to how you'd implement the Russian example from the beginning of
+the article, well, There's More Than One Way To Do It, but it could be
+something like this (using English words for Russian, just so you know
+what's going on):
+
+  "I [quant,_1,directory,accusative] scanned."
+
+This shifts the burden of complexity off to the quant method.  That
+method's parameters are: the numeric value it's going to use to
+quantify something; the Russian word it's going to quantify; and the
+parameter "accusative", which you're using to mean that this
+sentence's syntax wants a noun in the accusative case there, although
+that quantification method may have to overrule, for grammatical
+reasons you may recall from the beginning of this article.
+
+Now, the Russian quant method here is responsible not only for
+implementing the strange logic necessary for figuring out how Russian
+number-phrases impose case and number on their noun-phrases, but also
+for inflecting the Russian word for "directory".  How that inflection
+is to be carried out is no small issue, and among the solutions I've
+seen, some (like variations on a simple lookup in a hash where all
+possible forms are provided for all necessary words) are
+straightforward but I<can> become cumbersome when you need to inflect
+more than a few dozen words; and other solutions (like using
+algorithms to model the inflections, storing only root forms and
+irregularities) I<can> involve more overhead than is justifiable for
+all but the largest lexicons.
+
+Mercifully, this design decision becomes crucial only in the hairiest
+of inflected languages, of which Russian is by no means the I<worst> case
+scenario, but is worse than most.  Most languages have simpler
+inflection systems; for example, in English or Swahili, there are
+generally no more than two possible inflected forms for a given noun
+("error/errors"; "kosa/makosa"), and the
+rules for producing these forms are fairly simple -- or at least,
+simple rules can be formulated that work for most words, and you can
+then treat the exceptions as just "irregular", at least relative to
+your ad hoc rules.  A simpler inflection system (simpler rules, fewer
+forms) means that design decisions are less crucial to maintaining
+sanity, whereas the same decisions could incur
+overhead-versus-scalability problems in languages like Russian.  It
+may I<also> be likely that code (possibly in Perl, as with
+Lingua::EN::Inflect, for English nouns) has already
+been written for the language in question, whether simple or complex.
+
+Moreover, a third possibility may even be simpler than anything
+discussed above: "Just require that all possible (or at least
+applicable) forms be provided in the call to the given language's quant
+method, as in:"
+
+  "I found [quant,_1,file,files]."
+
+That way, quant just has to chose which form it needs, without having
+to look up or generate anything.  While possibly not optimal for
+Russian, this should work well for most other languages, where
+quantification is not as complicated an operation.
+
+=head2 The Devil in the Details
+
+There's plenty more to Maketext than described above -- for example,
+there's the details of how language tags ("en-US", "i-pwn", "fi",
+etc.) or locale IDs ("en_US") interact with actual module naming
+("BogoQuery/Locale/en_us.pm"), and what magic can ensue; there's the
+details of how to record (and possibly negotiate) what character
+encoding Maketext will return text in (UTF8? Latin-1? KOI8?).  There's
+the interesting fact that Maketext is for localization, but nowhere
+actually has a "C<use locale;>" anywhere in it.  For the curious,
+there's the somewhat frightening details of how I actually
+implement something like data inheritance so that searches across
+modules' %Lexicon hashes can parallel how Perl implements method
+inheritance.
+
+And, most importantly, there's all the practical details of how to
+actually go about deriving from Maketext so you can use it for your
+interfaces, and the various tools and conventions for starting out and
+maintaining individual language modules.
+
+That is all covered in the documentation for Locale::Maketext and the
+modules that come with it, available in CPAN.  After having read this
+article, which covers the why's of Maketext, the documentation,
+which covers the how's of it, should be quite straightforward.
+
+=head2 The Proof in the Pudding: Localizing Web Sites
+
+Maketext and gettext have a notable difference: gettext is in C,
+accessible thru C library calls, whereas Maketext is in Perl, and
+really can't work without a Perl interpreter (although I suppose
+something like it could be written for C).  Accidents of history (and
+not necessarily lucky ones) have made C++ the most common language for
+the implementation of applications like word processors, Web browsers,
+and even many in-house applications like custom query systems.  Current
+conditions make it somewhat unlikely that the next one of any of these
+kinds of applications will be written in Perl, albeit clearly more for
+reasons of custom and inertia than out of consideration of what is the
+right tool for the job.
+
+However, other accidents of history have made Perl a well-accepted
+language for design of server-side programs (generally in CGI form)
+for Web site interfaces.  Localization of static pages in Web sites is
+trivial, feasible either with simple language-negotiation features in
+servers like Apache, or with some kind of server-side inclusions of
+language-appropriate text into layout templates.  However, I think
+that the localization of Perl-based search systems (or other kinds of
+dynamic content) in Web sites, be they public or access-restricted,
+is where Maketext will see the greatest use.
+
+I presume that it would be only the exceptional Web site that gets
+localized for English I<and> Chinese I<and> Italian I<and> Arabic
+I<and> Russian, to recall the languages from the beginning of this
+article -- to say nothing of German, Spanish, French, Japanese,
+Finnish, and Hindi, to name a few languages that benefit from large
+numbers of programmers or Web viewers or both.
+
+However, the ever-increasing internationalization of the Web (whether
+measured in terms of amount of content, of numbers of content writers
+or programmers, or of size of content audiences) makes it increasingly
+likely that the interface to the average Web-based dynamic content
+service will be localized for two or maybe three languages.  It is my
+hope that Maketext will make that task as simple as possible, and will
+remove previous barriers to localization for languages dissimilar to
+English.
+
+ __END__
+
+Sean M. Burke (sburkeE<64>cpan.org) has a Master's in linguistics
+from Northwestern University; he specializes in language technology.
+Jordan Lachler (lachlerE<64>unm.edu) is a PhD student in the Department of
+Linguistics at the University of New Mexico; he specializes in
+morphology and pedagogy of North American native languages.
+
+=head2 References
+
+Alvestrand, Harald Tveit.  1995.  I<RFC 1766: Tags for the
+Identification of Languages.>
+C<L<http://www.ietf.org/rfc/rfc1766.txt>>
+[Now see RFC 3066.]
+
+Callon, Ross, editor.  1996.  I<RFC 1925: The Twelve
+Networking Truths.>
+C<L<http://www.ietf.org/rfc/rfc1925.txt>>
+
+Drepper, Ulrich, Peter Miller,
+and FranE<ccedil>ois Pinard.  1995-2001.  GNU
+C<gettext>.  Available in C<L<ftp://prep.ai.mit.edu/pub/gnu/>>, with
+extensive docs in the distribution tarball.  [Since
+I wrote this article in 1998, I now see that the
+gettext docs are now trying more to come to terms with
+plurality.  Whether useful conclusions have come from it
+is another question altogether. -- SMB, May 2001]
+
+Forbes, Nevill.  1964.  I<Russian Grammar.>  Third Edition, revised
+by J. C. Dumbreck.  Oxford University Press.
+
+=cut
+
+#End
+