Initial Commit

2025-12-03 16:38:10 +01:00
parent c5e26bf594
commit b732d8d4b5
17680 changed files with 5977495 additions and 2 deletions
--- a/database/perl/lib/I18N/Collate.pm
+++ b/database/perl/lib/I18N/Collate.pm
@@ -0,0 +1,196 @@
+package I18N::Collate;
+
+use strict;
+our $VERSION = '1.02';
+
+=head1 NAME
+
+I18N::Collate - compare 8-bit scalar data according to the current locale
+
+=head1 SYNOPSIS
+
+    use I18N::Collate;
+    setlocale(LC_COLLATE, 'locale-of-your-choice');
+    $s1 = I18N::Collate->new("scalar_data_1");
+    $s2 = I18N::Collate->new("scalar_data_2");
+
+=head1 DESCRIPTION
+
+  ***
+
+  WARNING: starting from the Perl version 5.003_06
+  the I18N::Collate interface for comparing 8-bit scalar data
+  according to the current locale
+
+	HAS BEEN DEPRECATED
+
+  That is, please do not use it anymore for any new applications
+  and please migrate the old applications away from it because its
+  functionality was integrated into the Perl core language in the
+  release 5.003_06.
+
+  See the perllocale manual page for further information.
+
+  ***
+
+This module provides you with objects that will collate
+according to your national character set, provided that the
+POSIX setlocale() function is supported on your system.
+
+You can compare $s1 and $s2 above with
+
+    $s1 le $s2
+
+to extract the data itself, you'll need a dereference: $$s1
+
+This module uses POSIX::setlocale(). The basic collation conversion is
+done by strxfrm() which terminates at NUL characters being a decent C
+routine.  collate_xfrm() handles embedded NUL characters gracefully.
+
+The available locales depend on your operating system; try whether
+C<locale -a> shows them or man pages for "locale" or "nlsinfo" or the
+direct approach C<ls /usr/lib/nls/loc> or C<ls /usr/lib/nls> or
+C<ls /usr/lib/locale>.  Not all the locales that your vendor supports
+are necessarily installed: please consult your operating system's
+documentation and possibly your local system administration.  The
+locale names are probably something like C<xx_XX.(ISO)?8859-N> or
+C<xx_XX.(ISO)?8859N>, for example C<fr_CH.ISO8859-1> is the Swiss (CH)
+variant of French (fr), ISO Latin (8859) 1 (-1) which is the Western
+European character set.
+
+=cut
+
+# I18N::Collate.pm
+#
+# Author:	Jarkko Hietaniemi <F<jhi@iki.fi>>
+#		Helsinki University of Technology, Finland
+#
+# Acks:		Guy Decoux <F<decoux@moulon.inra.fr>> understood
+#		overloading magic much deeper than I and told
+#		how to cut the size of this code by more than half.
+#		(my first version did overload all of lt gt eq le ge cmp)
+#
+# Purpose:      compare 8-bit scalar data according to the current locale
+#
+# Requirements:	Perl5 POSIX::setlocale() and POSIX::strxfrm()
+#
+# Exports:	setlocale 1)
+#		collate_xfrm 2)
+#
+# Overloads:	cmp # 3)
+#
+# Usage:	use I18N::Collate;
+#	        setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4)
+#		$s1 = I18N::Collate->("scalar_data_1");
+#		$s2 = I18N::Collate->("scalar_data_2");
+#
+#		now you can compare $s1 and $s2: $s1 le $s2
+#		to extract the data itself, you need to deref: $$s1
+#
+# Notes:
+#		1) this uses POSIX::setlocale
+#		2) the basic collation conversion is done by strxfrm() which
+#		   terminates at NUL characters being a decent C routine.
+#		   collate_xfrm handles embedded NUL characters gracefully.
+#		3) due to cmp and overload magic, lt le eq ge gt work also
+#		4) the available locales depend on your operating system;
+#		   try whether "locale -a" shows them or man pages for
+#		   "locale" or "nlsinfo" work or the more direct
+#		   approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls".
+#		   Not all the locales that your vendor supports
+#		   are necessarily installed: please consult your
+#		   operating system's documentation.
+#		   The locale names are probably something like
+#		   'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N',
+#		   for example 'fr_CH.ISO8859-1' is the Swiss (CH)
+#		   variant of French (fr), ISO Latin (8859) 1 (-1)
+#		   which is the Western European character set.
+#
+# Updated:	19961005
+#
+# ---
+
+use POSIX qw(strxfrm LC_COLLATE);
+use warnings::register;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+our @EXPORT = qw(collate_xfrm setlocale LC_COLLATE);
+our @EXPORT_OK = qw();
+
+use overload qw(
+fallback	1
+cmp		collate_cmp
+);
+
+our($LOCALE, $C);
+
+our $please_use_I18N_Collate_even_if_deprecated = 0;
+sub new {
+  my $new = $_[1];
+
+  if (warnings::enabled() && $] >= 5.003_06) {
+    unless ($please_use_I18N_Collate_even_if_deprecated) {
+      warnings::warn <<___EOD___;
+***
+
+  WARNING: starting from the Perl version 5.003_06
+  the I18N::Collate interface for comparing 8-bit scalar data
+  according to the current locale
+
+	HAS BEEN DEPRECATED
+
+  That is, please do not use it anymore for any new applications
+  and please migrate the old applications away from it because its
+  functionality was integrated into the Perl core language in the
+  release 5.003_06.
+
+  See the perllocale manual page for further information.
+
+***
+___EOD___
+      $please_use_I18N_Collate_even_if_deprecated++;
+    }
+  }
+
+  bless \$new;
+}
+
+sub setlocale {
+ my ($category, $locale) = @_[0,1];
+
+ POSIX::setlocale($category, $locale) if (defined $category);
+ # the current $LOCALE
+ $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || '';
+}
+
+sub C {
+  my $s = ${$_[0]};
+
+  $C->{$LOCALE}->{$s} = collate_xfrm($s)
+    unless (defined $C->{$LOCALE}->{$s}); # cache when met
+
+  $C->{$LOCALE}->{$s};
+}
+
+sub collate_xfrm {
+  my $s = $_[0];
+  my $x = '';
+
+  for (split(/(\000+)/, $s)) {
+    $x .= (/^\000/) ? $_ : strxfrm("$_\000");
+  }
+
+  $x;
+}
+
+sub collate_cmp {
+  &C($_[0]) cmp &C($_[1]);
+}
+
+# init $LOCALE
+
+&I18N::Collate::setlocale();
+
+1; # keep require happy
--- a/database/perl/lib/I18N/LangTags.pm
+++ b/database/perl/lib/I18N/LangTags.pm
@@ -0,0 +1,887 @@
+
+# Time-stamp: "2004-10-06 23:26:33 ADT"
+# Sean M. Burke <sburke@cpan.org>
+
+require 5.000;
+package I18N::LangTags;
+use strict;
+require Exporter;
+our @ISA = qw(Exporter);
+our @EXPORT = qw();
+our @EXPORT_OK = qw(is_language_tag same_language_tag
+                extract_language_tags super_languages
+                similarity_language_tag is_dialect_of
+                locale2language_tag alternate_language_tags
+                encode_language_tag panic_languages
+                implicate_supers
+                implicate_supers_strictly
+               );
+our %EXPORT_TAGS = ('ALL' => \@EXPORT_OK);
+
+our $VERSION = "0.44";
+our %Panic;
+
+sub uniq { my %seen; return grep(!($seen{$_}++), @_); } # a util function
+
+
+=head1 NAME
+
+I18N::LangTags - functions for dealing with RFC3066-style language tags
+
+=head1 SYNOPSIS
+
+  use I18N::LangTags();
+
+...or specify whichever of those functions you want to import, like so:
+
+  use I18N::LangTags qw(implicate_supers similarity_language_tag);
+
+All the exportable functions are listed below -- you're free to import
+only some, or none at all.  By default, none are imported.  If you
+say:
+
+    use I18N::LangTags qw(:ALL)
+
+...then all are exported.  (This saves you from having to use
+something less obvious like C<use I18N::LangTags qw(/./)>.)
+
+If you don't import any of these functions, assume a C<&I18N::LangTags::>
+in front of all the function names in the following examples.
+
+=head1 DESCRIPTION
+
+Language tags are a formalism, described in RFC 3066 (obsoleting
+1766), for declaring what language form (language and possibly
+dialect) a given chunk of information is in.
+
+This library provides functions for common tasks involving language
+tags as they are needed in a variety of protocols and applications.
+
+Please see the "See Also" references for a thorough explanation
+of how to correctly use language tags.
+
+=over
+
+=cut
+
+###########################################################################
+
+=item * the function is_language_tag($lang1)
+
+Returns true iff $lang1 is a formally valid language tag.
+
+   is_language_tag("fr")            is TRUE
+   is_language_tag("x-jicarilla")   is FALSE
+       (Subtags can be 8 chars long at most -- 'jicarilla' is 9)
+
+   is_language_tag("sgn-US")    is TRUE
+       (That's American Sign Language)
+
+   is_language_tag("i-Klikitat")    is TRUE
+       (True without regard to the fact noone has actually
+        registered Klikitat -- it's a formally valid tag)
+
+   is_language_tag("fr-patois")     is TRUE
+       (Formally valid -- altho descriptively weak!)
+
+   is_language_tag("Spanish")       is FALSE
+   is_language_tag("french-patois") is FALSE
+       (No good -- first subtag has to match
+        /^([xXiI]|[a-zA-Z]{2,3})$/ -- see RFC3066)
+
+   is_language_tag("x-borg-prot2532") is TRUE
+       (Yes, subtags can contain digits, as of RFC3066)
+
+=cut
+
+sub is_language_tag {
+
+  ## Changes in the language tagging standards may have to be reflected here.
+
+  my($tag) = lc($_[0]);
+
+  return 0 if $tag eq "i" or $tag eq "x";
+  # Bad degenerate cases that the following
+  #  regexp would erroneously let pass
+
+  return $tag =~ 
+    /^(?:  # First subtag
+         [xi] | [a-z]{2,3}
+      )
+      (?:  # Subtags thereafter
+         -           # separator
+         [a-z0-9]{1,8}  # subtag  
+      )*
+    $/xs ? 1 : 0;
+}
+
+###########################################################################
+
+=item * the function extract_language_tags($whatever)
+
+Returns a list of whatever looks like formally valid language tags
+in $whatever.  Not very smart, so don't get too creative with
+what you want to feed it.
+
+  extract_language_tags("fr, fr-ca, i-mingo")
+    returns:   ('fr', 'fr-ca', 'i-mingo')
+
+  extract_language_tags("It's like this: I'm in fr -- French!")
+    returns:   ('It', 'in', 'fr')
+  (So don't just feed it any old thing.)
+
+The output is untainted.  If you don't know what tainting is,
+don't worry about it.
+
+=cut
+
+sub extract_language_tags {
+
+  ## Changes in the language tagging standards may have to be reflected here.
+
+  my($text) =
+    $_[0] =~ m/(.+)/  # to make for an untainted result
+    ? $1 : ''
+  ;
+  
+  return grep(!m/^[ixIX]$/s, # 'i' and 'x' aren't good tags
+    $text =~ 
+    m/
+      \b
+      (?:  # First subtag
+         [iIxX] | [a-zA-Z]{2,3}
+      )
+      (?:  # Subtags thereafter
+         -           # separator
+         [a-zA-Z0-9]{1,8}  # subtag  
+      )*
+      \b
+    /xsg
+  );
+}
+
+###########################################################################
+
+=item * the function same_language_tag($lang1, $lang2)
+
+Returns true iff $lang1 and $lang2 are acceptable variant tags
+representing the same language-form.
+
+   same_language_tag('x-kadara', 'i-kadara')  is TRUE
+      (The x/i- alternation doesn't matter)
+   same_language_tag('X-KADARA', 'i-kadara')  is TRUE
+      (...and neither does case)
+   same_language_tag('en',       'en-US')     is FALSE
+      (all-English is not the SAME as US English)
+   same_language_tag('x-kadara', 'x-kadar')   is FALSE
+      (these are totally unrelated tags)
+   same_language_tag('no-bok',    'nb')       is TRUE
+      (no-bok is a legacy tag for nb (Norwegian Bokmal))
+
+C<same_language_tag> works by just seeing whether
+C<encode_language_tag($lang1)> is the same as
+C<encode_language_tag($lang2)>.
+
+(Yes, I know this function is named a bit oddly.  Call it historic
+reasons.)
+
+=cut
+
+sub same_language_tag {
+  my $el1 = &encode_language_tag($_[0]);
+  return 0 unless defined $el1;
+   # this avoids the problem of
+   # encode_language_tag($lang1) eq and encode_language_tag($lang2)
+   # being true if $lang1 and $lang2 are both undef
+
+  return $el1 eq &encode_language_tag($_[1]) ? 1 : 0;
+}
+
+###########################################################################
+
+=item * the function similarity_language_tag($lang1, $lang2)
+
+Returns an integer representing the degree of similarity between
+tags $lang1 and $lang2 (the order of which does not matter), where
+similarity is the number of common elements on the left,
+without regard to case and to x/i- alternation.
+
+   similarity_language_tag('fr', 'fr-ca')           is 1
+      (one element in common)
+   similarity_language_tag('fr-ca', 'fr-FR')        is 1
+      (one element in common)
+
+   similarity_language_tag('fr-CA-joual',
+                           'fr-CA-PEI')             is 2
+   similarity_language_tag('fr-CA-joual', 'fr-CA')  is 2
+      (two elements in common)
+
+   similarity_language_tag('x-kadara', 'i-kadara')  is 1
+      (x/i- doesn't matter)
+
+   similarity_language_tag('en',       'x-kadar')   is 0
+   similarity_language_tag('x-kadara', 'x-kadar')   is 0
+      (unrelated tags -- no similarity)
+
+   similarity_language_tag('i-cree-syllabic',
+                           'i-cherokee-syllabic')   is 0
+      (no B<leftmost> elements in common!)
+
+=cut
+
+sub similarity_language_tag {
+  my $lang1 = &encode_language_tag($_[0]);
+  my $lang2 = &encode_language_tag($_[1]);
+   # And encode_language_tag takes care of the whole
+   #  no-nyn==nn, i-hakka==zh-hakka, etc, things
+   
+  # NB: (i-sil-...)?  (i-sgn-...)?
+
+  return undef if !defined($lang1) and !defined($lang2);
+  return 0 if !defined($lang1) or !defined($lang2);
+
+  my @l1_subtags = split('-', $lang1);
+  my @l2_subtags = split('-', $lang2);
+  my $similarity = 0;
+
+  while(@l1_subtags and @l2_subtags) {
+    if(shift(@l1_subtags) eq shift(@l2_subtags)) {
+      ++$similarity;
+    } else {
+      last;
+    } 
+  }
+  return $similarity;
+}
+
+###########################################################################
+
+=item * the function is_dialect_of($lang1, $lang2)
+
+Returns true iff language tag $lang1 represents a subform of
+language tag $lang2.
+
+B<Get the order right!  It doesn't work the other way around!>
+
+   is_dialect_of('en-US', 'en')            is TRUE
+     (American English IS a dialect of all-English)
+
+   is_dialect_of('fr-CA-joual', 'fr-CA')   is TRUE
+   is_dialect_of('fr-CA-joual', 'fr')      is TRUE
+     (Joual is a dialect of (a dialect of) French)
+
+   is_dialect_of('en', 'en-US')            is FALSE
+     (all-English is a NOT dialect of American English)
+
+   is_dialect_of('fr', 'en-CA')            is FALSE
+
+   is_dialect_of('en',    'en'   )         is TRUE
+   is_dialect_of('en-US', 'en-US')         is TRUE
+     (B<Note:> these are degenerate cases)
+
+   is_dialect_of('i-mingo-tom', 'x-Mingo') is TRUE
+     (the x/i thing doesn't matter, nor does case)
+
+   is_dialect_of('nn', 'no')               is TRUE
+     (because 'nn' (New Norse) is aliased to 'no-nyn',
+      as a special legacy case, and 'no-nyn' is a
+      subform of 'no' (Norwegian))
+
+=cut
+
+sub is_dialect_of {
+
+  my $lang1 = &encode_language_tag($_[0]);
+  my $lang2 = &encode_language_tag($_[1]);
+
+  return undef if !defined($lang1) and !defined($lang2);
+  return 0 if !defined($lang1) or !defined($lang2);
+
+  return 1 if $lang1 eq $lang2;
+  return 0 if length($lang1) < length($lang2);
+
+  $lang1 .= '-';
+  $lang2 .= '-';
+  return
+    (substr($lang1, 0, length($lang2)) eq $lang2) ? 1 : 0;
+}
+
+###########################################################################
+
+=item * the function super_languages($lang1)
+
+Returns a list of language tags that are superordinate tags to $lang1
+-- it gets this by removing subtags from the end of $lang1 until
+nothing (or just "i" or "x") is left.
+
+   super_languages("fr-CA-joual")  is  ("fr-CA", "fr")
+
+   super_languages("en-AU")  is  ("en")
+
+   super_languages("en")  is  empty-list, ()
+
+   super_languages("i-cherokee")  is  empty-list, ()
+    ...not ("i"), which would be illegal as well as pointless.
+
+If $lang1 is not a valid language tag, returns empty-list in
+a list context, undef in a scalar context.
+
+A notable and rather unavoidable problem with this method:
+"x-mingo-tom" has an "x" because the whole tag isn't an
+IANA-registered tag -- but super_languages('x-mingo-tom') is
+('x-mingo') -- which isn't really right, since 'i-mingo' is
+registered.  But this module has no way of knowing that.  (But note
+that same_language_tag('x-mingo', 'i-mingo') is TRUE.)
+
+More importantly, you assume I<at your peril> that superordinates of
+$lang1 are mutually intelligible with $lang1.  Consider this
+carefully.
+
+=cut 
+
+sub super_languages {
+  my $lang1 = $_[0];
+  return() unless defined($lang1) && &is_language_tag($lang1);
+
+  # a hack for those annoying new (2001) tags:
+  $lang1 =~ s/^nb\b/no-bok/i; # yes, backwards
+  $lang1 =~ s/^nn\b/no-nyn/i; # yes, backwards
+  $lang1 =~ s/^[ix](-hakka\b)/zh$1/i; # goes the right way
+   # i-hakka-bork-bjork-bjark => zh-hakka-bork-bjork-bjark
+
+  my @l1_subtags = split('-', $lang1);
+
+  ## Changes in the language tagging standards may have to be reflected here.
+
+  # NB: (i-sil-...)?
+
+  my @supers = ();
+  foreach my $bit (@l1_subtags) {
+    push @supers, 
+      scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit;
+  }
+  pop @supers if @supers;
+  shift @supers if @supers && $supers[0] =~ m<^[iIxX]$>s;
+  return reverse @supers;
+}
+
+###########################################################################
+
+=item * the function locale2language_tag($locale_identifier)
+
+This takes a locale name (like "en", "en_US", or "en_US.ISO8859-1")
+and maps it to a language tag.  If it's not mappable (as with,
+notably, "C" and "POSIX"), this returns empty-list in a list context,
+or undef in a scalar context.
+
+   locale2language_tag("en") is "en"
+
+   locale2language_tag("en_US") is "en-US"
+
+   locale2language_tag("en_US.ISO8859-1") is "en-US"
+
+   locale2language_tag("C") is undef or ()
+
+   locale2language_tag("POSIX") is undef or ()
+
+   locale2language_tag("POSIX") is undef or ()
+
+I'm not totally sure that locale names map satisfactorily to language
+tags.  Think REAL hard about how you use this.  YOU HAVE BEEN WARNED.
+
+The output is untainted.  If you don't know what tainting is,
+don't worry about it.
+
+=cut 
+
+sub locale2language_tag {
+  my $lang =
+    $_[0] =~ m/(.+)/  # to make for an untainted result
+    ? $1 : ''
+  ;
+
+  return $lang if &is_language_tag($lang); # like "en"
+
+  $lang =~ tr<_><->;  # "en_US" -> en-US
+  $lang =~ s<(?:[\.\@][-_a-zA-Z0-9]+)+$><>s;  # "en_US.ISO8859-1" -> en-US
+   # it_IT.utf8@euro => it-IT
+
+  return $lang if &is_language_tag($lang);
+
+  return;
+}
+
+###########################################################################
+
+=item * the function encode_language_tag($lang1)
+
+This function, if given a language tag, returns an encoding of it such
+that:
+
+* tags representing different languages never get the same encoding.
+
+* tags representing the same language always get the same encoding.
+
+* an encoding of a formally valid language tag always is a string
+value that is defined, has length, and is true if considered as a
+boolean.
+
+Note that the encoding itself is B<not> a formally valid language tag.
+Note also that you cannot, currently, go from an encoding back to a
+language tag that it's an encoding of.
+
+Note also that you B<must> consider the encoded value as atomic; i.e.,
+you should not consider it as anything but an opaque, unanalysable
+string value.  (The internals of the encoding method may change in
+future versions, as the language tagging standard changes over time.)
+
+C<encode_language_tag> returns undef if given anything other than a
+formally valid language tag.
+
+The reason C<encode_language_tag> exists is because different language
+tags may represent the same language; this is normally treatable with
+C<same_language_tag>, but consider this situation:
+
+You have a data file that expresses greetings in different languages.
+Its format is "[language tag]=[how to say 'Hello']", like:
+
+          en-US=Hiho
+          fr=Bonjour
+          i-mingo=Hau'
+
+And suppose you write a program that reads that file and then runs as
+a daemon, answering client requests that specify a language tag and
+then expect the string that says how to greet in that language.  So an
+interaction looks like:
+
+          greeting-client asks:    fr
+          greeting-server answers: Bonjour
+
+So far so good.  But suppose the way you're implementing this is:
+
+          my %greetings;
+          die unless open(IN, "<", "in.dat");
+          while(<IN>) {
+            chomp;
+            next unless /^([^=]+)=(.+)/s;
+            my($lang, $expr) = ($1, $2);
+            $greetings{$lang} = $expr;
+          }
+          close(IN);
+
+at which point %greetings has the contents:
+
+          "en-US"   => "Hiho"
+          "fr"      => "Bonjour"
+          "i-mingo" => "Hau'"
+
+And suppose then that you answer client requests for language $wanted
+by just looking up $greetings{$wanted}.
+
+If the client asks for "fr", that will look up successfully in
+%greetings, to the value "Bonjour".  And if the client asks for
+"i-mingo", that will look up successfully in %greetings, to the value
+"Hau'".
+
+But if the client asks for "i-Mingo" or "x-mingo", or "Fr", then the
+lookup in %greetings fails.  That's the Wrong Thing.
+
+You could instead do lookups on $wanted with:
+
+          use I18N::LangTags qw(same_language_tag);
+          my $response = '';
+          foreach my $l2 (keys %greetings) {
+            if(same_language_tag($wanted, $l2)) {
+              $response = $greetings{$l2};
+              last;
+            }
+          }
+
+But that's rather inefficient.  A better way to do it is to start your
+program with:
+
+          use I18N::LangTags qw(encode_language_tag);
+          my %greetings;
+          die unless open(IN, "<", "in.dat");
+          while(<IN>) {
+            chomp;
+            next unless /^([^=]+)=(.+)/s;
+            my($lang, $expr) = ($1, $2);
+            $greetings{
+                        encode_language_tag($lang)
+                      } = $expr;
+          }
+          close(IN);
+
+and then just answer client requests for language $wanted by just
+looking up
+
+          $greetings{encode_language_tag($wanted)}
+
+And that does the Right Thing.
+
+=cut
+
+sub encode_language_tag {
+  # Only similarity_language_tag() is allowed to analyse encodings!
+
+  ## Changes in the language tagging standards may have to be reflected here.
+
+  my($tag) = $_[0] || return undef;
+  return undef unless &is_language_tag($tag);
+
+  # For the moment, these legacy variances are few enough that
+  #  we can just handle them here with regexps.
+  $tag =~ s/^iw\b/he/i; # Hebrew
+  $tag =~ s/^in\b/id/i; # Indonesian
+  $tag =~ s/^cre\b/cr/i; # Cree
+  $tag =~ s/^jw\b/jv/i; # Javanese
+  $tag =~ s/^[ix]-lux\b/lb/i;  # Luxemburger
+  $tag =~ s/^[ix]-navajo\b/nv/i;  # Navajo
+  $tag =~ s/^ji\b/yi/i;  # Yiddish
+  # SMB 2003 -- Hm.  There's a bunch of new XXX->YY variances now,
+  #  but maybe they're all so obscure I can ignore them.   "Obscure"
+  #  meaning either that the language is obscure, and/or that the
+  #  XXX form was extant so briefly that it's unlikely it was ever
+  #  used.  I hope.
+  #
+  # These go FROM the simplex to complex form, to get
+  #  similarity-comparison right.  And that's okay, since
+  #  similarity_language_tag is the only thing that
+  #  analyzes our output.
+  $tag =~ s/^[ix]-hakka\b/zh-hakka/i;  # Hakka
+  $tag =~ s/^nb\b/no-bok/i;  # BACKWARDS for Bokmal
+  $tag =~ s/^nn\b/no-nyn/i;  # BACKWARDS for Nynorsk
+
+  $tag =~ s/^[xiXI]-//s;
+   # Just lop off any leading "x/i-"
+
+  return "~" . uc($tag);
+}
+
+#--------------------------------------------------------------------------
+
+=item * the function alternate_language_tags($lang1)
+
+This function, if given a language tag, returns all language tags that
+are alternate forms of this language tag.  (I.e., tags which refer to
+the same language.)  This is meant to handle legacy tags caused by
+the minor changes in language tag standards over the years; and
+the x-/i- alternation is also dealt with.
+
+Note that this function does I<not> try to equate new (and never-used,
+and unusable)
+ISO639-2 three-letter tags to old (and still in use) ISO639-1
+two-letter equivalents -- like "ara" -> "ar" -- because
+"ara" has I<never> been in use as an Internet language tag,
+and RFC 3066 stipulates that it never should be, since a shorter
+tag ("ar") exists.
+
+Examples:
+
+  alternate_language_tags('no-bok')       is ('nb')
+  alternate_language_tags('nb')           is ('no-bok')
+  alternate_language_tags('he')           is ('iw')
+  alternate_language_tags('iw')           is ('he')
+  alternate_language_tags('i-hakka')      is ('zh-hakka', 'x-hakka')
+  alternate_language_tags('zh-hakka')     is ('i-hakka', 'x-hakka')
+  alternate_language_tags('en')           is ()
+  alternate_language_tags('x-mingo-tom')  is ('i-mingo-tom')
+  alternate_language_tags('x-klikitat')   is ('i-klikitat')
+  alternate_language_tags('i-klikitat')   is ('x-klikitat')
+
+This function returns empty-list if given anything other than a formally
+valid language tag.
+
+=cut
+
+my %alt = qw( i x   x i   I X   X I );
+sub alternate_language_tags {
+  my $tag = $_[0];
+  return() unless &is_language_tag($tag);
+
+  my @em; # push 'em real goood!
+
+  # For the moment, these legacy variances are few enough that
+  #  we can just handle them here with regexps.
+  
+  if(     $tag =~ m/^[ix]-hakka\b(.*)/i) {push @em, "zh-hakka$1";
+  } elsif($tag =~ m/^zh-hakka\b(.*)/i) {  push @em, "x-hakka$1", "i-hakka$1";
+
+  } elsif($tag =~ m/^he\b(.*)/i) { push @em, "iw$1";
+  } elsif($tag =~ m/^iw\b(.*)/i) { push @em, "he$1";
+
+  } elsif($tag =~ m/^in\b(.*)/i) { push @em, "id$1";
+  } elsif($tag =~ m/^id\b(.*)/i) { push @em, "in$1";
+
+  } elsif($tag =~ m/^[ix]-lux\b(.*)/i) { push @em, "lb$1";
+  } elsif($tag =~ m/^lb\b(.*)/i) {       push @em, "i-lux$1", "x-lux$1";
+
+  } elsif($tag =~ m/^[ix]-navajo\b(.*)/i) { push @em, "nv$1";
+  } elsif($tag =~ m/^nv\b(.*)/i) {          push @em, "i-navajo$1", "x-navajo$1";
+
+  } elsif($tag =~ m/^yi\b(.*)/i) { push @em, "ji$1";
+  } elsif($tag =~ m/^ji\b(.*)/i) { push @em, "yi$1";
+
+  } elsif($tag =~ m/^nb\b(.*)/i) {     push @em, "no-bok$1";
+  } elsif($tag =~ m/^no-bok\b(.*)/i) { push @em, "nb$1";
+  
+  } elsif($tag =~ m/^nn\b(.*)/i) {     push @em, "no-nyn$1";
+  } elsif($tag =~ m/^no-nyn\b(.*)/i) { push @em, "nn$1";
+  }
+
+  push @em, $alt{$1} . $2 if $tag =~ /^([XIxi])(-.+)/;
+  return @em;
+}
+
+###########################################################################
+
+{
+  # Init %Panic...
+  
+  my @panic = (  # MUST all be lowercase!
+   # Only large ("national") languages make it in this list.
+   #  If you, as a user, are so bizarre that the /only/ language
+   #  you claim to accept is Galician, then no, we won't do you
+   #  the favor of providing Catalan as a panic-fallback for
+   #  you.  Because if I start trying to add "little languages" in
+   #  here, I'll just go crazy.
+
+   # Scandinavian lgs.  All based on opinion and hearsay.
+   'sv' => [qw(nb no da nn)],
+   'da' => [qw(nb no sv nn)], # I guess
+   [qw(no nn nb)], [qw(no nn nb sv da)],
+   'is' => [qw(da sv no nb nn)],
+   'fo' => [qw(da is no nb nn sv)], # I guess
+   
+   # I think this is about the extent of tolerable intelligibility
+   #  among large modern Romance languages.
+   'pt' => [qw(es ca it fr)], # Portuguese, Spanish, Catalan, Italian, French
+   'ca' => [qw(es pt it fr)],
+   'es' => [qw(ca it fr pt)],
+   'it' => [qw(es fr ca pt)],
+   'fr' => [qw(es it ca pt)],
+   
+   # Also assume that speakers of the main Indian languages prefer
+   #  to read/hear Hindi over English
+   [qw(
+     as bn gu kn ks kok ml mni mr ne or pa sa sd te ta ur
+   )] => 'hi',
+    # Assamese, Bengali, Gujarati, [Hindi,] Kannada (Kanarese), Kashmiri,
+    # Konkani, Malayalam, Meithei (Manipuri), Marathi, Nepali, Oriya,
+    # Punjabi, Sanskrit, Sindhi, Telugu, Tamil, and Urdu.
+   'hi' => [qw(bn pa as or)],
+   # I welcome finer data for the other Indian languages.
+   #  E.g., what should Oriya's list be, besides just Hindi?
+   
+   # And the panic languages for English is, of course, nil!
+
+   # My guesses at Slavic intelligibility:
+   ([qw(ru be uk)]) x 2,  # Russian, Belarusian, Ukranian
+   ([qw(sr hr bs)]) x 2,  # Serbian, Croatian, Bosnian
+   'cs' => 'sk', 'sk' => 'cs', # Czech + Slovak
+
+   'ms' => 'id', 'id' => 'ms', # Malay + Indonesian
+
+   'et' => 'fi', 'fi' => 'et', # Estonian + Finnish
+
+   #?? 'lo' => 'th', 'th' => 'lo', # Lao + Thai
+
+  );
+  my($k,$v);
+  while(@panic) {
+    ($k,$v) = splice(@panic,0,2);
+    foreach my $k (ref($k) ? @$k : $k) {
+      foreach my $v (ref($v) ? @$v : $v) {
+        push @{$Panic{$k} ||= []}, $v unless $k eq $v;
+      }
+    }
+  }
+}
+
+=item * the function @langs = panic_languages(@accept_languages)
+
+This function takes a list of 0 or more language
+tags that constitute a given user's Accept-Language list, and
+returns a list of tags for I<other> (non-super)
+languages that are probably acceptable to the user, to be
+used I<if all else fails>.
+
+For example, if a user accepts only 'ca' (Catalan) and
+'es' (Spanish), and the documents/interfaces you have
+available are just in German, Italian, and Chinese, then
+the user will most likely want the Italian one (and not
+the Chinese or German one!), instead of getting
+nothing.  So C<panic_languages('ca', 'es')> returns
+a list containing 'it' (Italian).
+
+English ('en') is I<always> in the return list, but
+whether it's at the very end or not depends
+on the input languages.  This function works by consulting
+an internal table that stipulates what common
+languages are "close" to each other.
+
+A useful construct you might consider using is:
+
+  @fallbacks = super_languages(@accept_languages);
+  push @fallbacks, panic_languages(
+    @accept_languages, @fallbacks,
+  );
+
+=cut
+
+sub panic_languages {
+  # When in panic or in doubt, run in circles, scream, and shout!
+  my(@out, %seen);
+  foreach my $t (@_) {
+    next unless $t;
+    next if $seen{$t}++; # so we don't return it or hit it again
+    # push @out, super_languages($t); # nah, keep that separate
+    push @out, @{ $Panic{lc $t} || next };
+  }
+  return grep !$seen{$_}++,  @out, 'en';
+}
+
+#---------------------------------------------------------------------------
+#---------------------------------------------------------------------------
+
+=item * the function implicate_supers( ...languages... )
+
+This takes a list of strings (which are presumed to be language-tags;
+strings that aren't, are ignored); and after each one, this function
+inserts super-ordinate forms that don't already appear in the list.
+The original list, plus these insertions, is returned.
+
+In other words, it takes this:
+
+  pt-br de-DE en-US fr pt-br-janeiro
+
+and returns this:
+
+  pt-br pt de-DE de en-US en fr pt-br-janeiro
+
+This function is most useful in the idiom
+
+  implicate_supers( I18N::LangTags::Detect::detect() );
+
+(See L<I18N::LangTags::Detect>.)
+
+
+=item * the function implicate_supers_strictly( ...languages... )
+
+This works like C<implicate_supers> except that the implicated
+forms are added to the end of the return list.
+
+In other words, implicate_supers_strictly takes a list of strings
+(which are presumed to be language-tags; strings that aren't, are
+ignored) and after the whole given list, it inserts the super-ordinate forms 
+of all given tags, minus any tags that already appear in the input list.
+
+In other words, it takes this:
+
+  pt-br de-DE en-US fr pt-br-janeiro
+
+and returns this:
+
+  pt-br de-DE en-US fr pt-br-janeiro pt de en
+
+The reason this function has "_strictly" in its name is that when
+you're processing an Accept-Language list according to the RFCs, if
+you interpret the RFCs quite strictly, then you would use
+implicate_supers_strictly, but for normal use (i.e., common-sense use,
+as far as I'm concerned) you'd use implicate_supers.
+
+=cut
+
+sub implicate_supers {
+  my @languages = grep is_language_tag($_), @_;
+  my %seen_encoded;
+  foreach my $lang (@languages) {
+    $seen_encoded{ I18N::LangTags::encode_language_tag($lang) } = 1
+  }
+
+  my(@output_languages);
+  foreach my $lang (@languages) {
+    push @output_languages, $lang;
+    foreach my $s ( I18N::LangTags::super_languages($lang) ) {
+      # Note that super_languages returns the longest first.
+      last if $seen_encoded{ I18N::LangTags::encode_language_tag($s) };
+      push @output_languages, $s;
+    }
+  }
+  return uniq( @output_languages );
+
+}
+
+sub implicate_supers_strictly {
+  my @tags = grep is_language_tag($_), @_;
+  return uniq( @_,   map super_languages($_), @_ );
+}
+
+
+
+###########################################################################
+1;
+__END__
+
+=back
+
+=head1 ABOUT LOWERCASING
+
+I've considered making all the above functions that output language
+tags return all those tags strictly in lowercase.  Having all your
+language tags in lowercase does make some things easier.  But you
+might as well just lowercase as you like, or call
+C<encode_language_tag($lang1)> where appropriate.
+
+=head1 ABOUT UNICODE PLAINTEXT LANGUAGE TAGS
+
+In some future version of I18N::LangTags, I plan to include support
+for RFC2482-style language tags -- which are basically just normal
+language tags with their ASCII characters shifted into Plane 14.
+
+=head1 SEE ALSO
+
+* L<I18N::LangTags::List|I18N::LangTags::List>
+
+* RFC 3066, C<L<http://www.ietf.org/rfc/rfc3066.txt>>, "Tags for the
+Identification of Languages".  (Obsoletes RFC 1766)
+
+* RFC 2277, C<L<http://www.ietf.org/rfc/rfc2277.txt>>, "IETF Policy on
+Character Sets and Languages".
+
+* RFC 2231, C<L<http://www.ietf.org/rfc/rfc2231.txt>>, "MIME Parameter
+Value and Encoded Word Extensions: Character Sets, Languages, and
+Continuations".
+
+* RFC 2482, C<L<http://www.ietf.org/rfc/rfc2482.txt>>,
+"Language Tagging in Unicode Plain Text".
+
+* Locale::Codes, in
+C<L<http://www.perl.com/CPAN/modules/by-module/Locale/>>
+
+* ISO 639-2, "Codes for the representation of names of languages",
+including two-letter and three-letter codes,
+C<L<http://www.loc.gov/standards/iso639-2/php/code_list.php>>
+
+* The IANA list of registered languages (hopefully up-to-date),
+C<L<http://www.iana.org/assignments/language-tags>>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1998+ Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+The programs and documentation in this dist are distributed in
+the hope that they will be useful, but without any warranty; without
+even the implied warranty of merchantability or fitness for a
+particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
--- a/database/perl/lib/I18N/LangTags/Detect.pm
+++ b/database/perl/lib/I18N/LangTags/Detect.pm
@@ -0,0 +1,242 @@
+
+# Time-stamp: "2004-06-20 21:47:55 ADT"
+
+require 5;
+package I18N::LangTags::Detect;
+use strict;
+
+our ( $MATCH_SUPERS, $USING_LANGUAGE_TAGS,
+             $USE_LITERALS, $MATCH_SUPERS_TIGHTLY);
+
+BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } }
+ # define the constant 'DEBUG' at compile-time
+
+our $VERSION = "1.08";
+our @ISA = ();
+use I18N::LangTags qw(alternate_language_tags locale2language_tag);
+
+sub _uniq { my %seen; return grep(!($seen{$_}++), @_); }
+sub _normalize {
+  my(@languages) =
+    map lc($_),
+    grep $_,
+    map {; $_, alternate_language_tags($_) } @_;
+  return _uniq(@languages) if wantarray;
+  return $languages[0];
+}
+
+#---------------------------------------------------------------------------
+# The extent of our functional interface:
+
+sub detect () { return __PACKAGE__->ambient_langprefs; }
+
+#===========================================================================
+
+sub ambient_langprefs { # always returns things untainted
+  my $base_class = $_[0];
+  
+  return $base_class->http_accept_langs
+   if length( $ENV{'REQUEST_METHOD'} || '' ); # I'm a CGI
+       # it's off in its own routine because it's complicated
+
+  # Not running as a CGI: try to puzzle out from the environment
+  my @languages;
+
+  foreach my $envname (qw( LANGUAGE LC_ALL LC_MESSAGES LANG )) {
+    next unless $ENV{$envname};
+    DEBUG and print "Noting \$$envname: $ENV{$envname}\n";
+    push @languages,
+      map locale2language_tag($_),
+        # if it's a lg tag, fine, pass thru (untainted)
+        # if it's a locale ID, try converting to a lg tag (untainted),
+        # otherwise nix it.
+
+      split m/[,:]/,
+      $ENV{$envname}
+    ;
+    last; # first one wins
+  }
+  
+  if($ENV{'IGNORE_WIN32_LOCALE'}) {
+    # no-op
+  } elsif(&_try_use('Win32::Locale')) {
+    # If we have that module installed...
+    push @languages, Win32::Locale::get_language() || ''
+     if defined &Win32::Locale::get_language;
+  }
+  return _normalize @languages;
+}
+
+#---------------------------------------------------------------------------
+
+sub http_accept_langs {
+  # Deal with HTTP "Accept-Language:" stuff.  Hassle.
+  # This code is more lenient than RFC 3282, which you must read.
+  # Hm.  Should I just move this into I18N::LangTags at some point?
+  no integer;
+
+  my $in = (@_ > 1) ? $_[1] : $ENV{'HTTP_ACCEPT_LANGUAGE'};
+  # (always ends up untainting)
+
+  return() unless defined $in and length $in;
+
+  $in =~ s/\([^\)]*\)//g; # nix just about any comment
+  
+  if( $in =~ m/^\s*([a-zA-Z][-a-zA-Z]+)\s*$/s ) {
+    # Very common case: just one language tag
+    return _normalize $1;
+  } elsif( $in =~ m/^\s*[a-zA-Z][-a-zA-Z]+(?:\s*,\s*[a-zA-Z][-a-zA-Z]+)*\s*$/s ) {
+    # Common case these days: just "foo, bar, baz"
+    return _normalize( $in =~ m/([a-zA-Z][-a-zA-Z]+)/g );
+  }
+
+  # Else it's complicated...
+
+  $in =~ s/\s+//g;  # Yes, we can just do without the WS!
+  my @in = $in =~ m/([^,]+)/g;
+  my %pref;
+  
+  my $q;
+  foreach my $tag (@in) {
+    next unless $tag =~
+     m/^([a-zA-Z][-a-zA-Z]+)
+        (?:
+         ;q=
+         (
+          \d*   # a bit too broad of a RE, but so what.
+          (?:
+            \.\d+
+          )?
+         )
+        )?
+       $
+      /sx
+    ;
+    $q = (defined $2 and length $2) ? $2 : 1;
+    #print "$1 with q=$q\n";
+    push @{ $pref{$q} }, lc $1;
+  }
+
+  return _normalize(
+    # Read off %pref, in descending key order...
+    map @{$pref{$_}},
+    sort {$b <=> $a}
+    keys %pref
+  );
+}
+
+#===========================================================================
+
+my %tried = ();
+  # memoization of whether we've used this module, or found it unusable.
+
+sub _try_use {   # Basically a wrapper around "require Modulename"
+  # "Many men have tried..."  "They tried and failed?"  "They tried and died."
+  return $tried{$_[0]} if exists $tried{$_[0]};  # memoization
+
+  my $module = $_[0];   # ASSUME sane module name!
+  { no strict 'refs';
+    no warnings 'once';
+    return($tried{$module} = 1)
+     if %{$module . "::Lexicon"} or @{$module . "::ISA"};
+    # weird case: we never use'd it, but there it is!
+  }
+
+  print " About to use $module ...\n" if DEBUG;
+  {
+    local $SIG{'__DIE__'};
+    local @INC = @INC;
+    pop @INC if $INC[-1] eq '.';
+    eval "require $module"; # used to be "use $module", but no point in that.
+  }
+  if($@) {
+    print "Error using $module \: $@\n" if DEBUG > 1;
+    return $tried{$module} = 0;
+  } else {
+    print " OK, $module is used\n" if DEBUG;
+    return $tried{$module} = 1;
+  }
+}
+
+#---------------------------------------------------------------------------
+1;
+__END__
+
+
+=head1 NAME
+
+I18N::LangTags::Detect - detect the user's language preferences
+
+=head1 SYNOPSIS
+
+  use I18N::LangTags::Detect;
+  my @user_wants = I18N::LangTags::Detect::detect();
+
+=head1 DESCRIPTION
+
+It is a common problem to want to detect what language(s) the user would
+prefer output in.
+
+=head1 FUNCTIONS
+
+This module defines one public function,
+C<I18N::LangTags::Detect::detect()>.  This function is not exported
+(nor is even exportable), and it takes no parameters.
+
+In scalar context, the function returns the most preferred language
+tag (or undef if no preference was seen).
+
+In list context (which is usually what you want),
+the function returns a
+(possibly empty) list of language tags representing (best first) what
+languages the user apparently would accept output in.  You will
+probably want to pass the output of this through
+C<I18N::LangTags::implicate_supers_tightly(...)>
+or
+C<I18N::LangTags::implicate_supers(...)>, like so:
+
+  my @languages =
+    I18N::LangTags::implicate_supers_tightly(
+      I18N::LangTags::Detect::detect()
+    );
+
+
+=head1 ENVIRONMENT
+
+This module looks at several environment variables:
+REQUEST_METHOD, HTTP_ACCEPT_LANGUAGE,
+LANGUAGE, LC_ALL, LC_MESSAGES, and LANG.
+
+It will also use the L<Win32::Locale> module, if it's installed
+and IGNORE_WIN32_LOCALE is not set to a true value in the
+environment.
+
+
+=head1 SEE ALSO
+
+L<I18N::LangTags>, L<Win32::Locale>, L<Locale::Maketext>.
+
+(This module's core code started out as a routine in Locale::Maketext;
+but I moved it here once I realized it was more generally useful.)
+
+
+=head1 COPYRIGHT
+
+Copyright (c) 1998-2004 Sean M. Burke. All rights reserved.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+The programs and documentation in this dist are distributed in
+the hope that they will be useful, but without any warranty; without
+even the implied warranty of merchantability or fitness for a
+particular purpose.
+
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
+# a tip: Put a bit of chopped up pickled ginger in your salad. It's tasty!
--- a/database/perl/lib/I18N/LangTags/List.pm
+++ b/database/perl/lib/I18N/LangTags/List.pm
--- a/database/perl/lib/I18N/Langinfo.pm
+++ b/database/perl/lib/I18N/Langinfo.pm
@@ -0,0 +1,291 @@
+package I18N::Langinfo;
+
+use 5.006;
+use strict;
+use warnings;
+use Carp;
+
+require Exporter;
+require XSLoader;
+
+our @ISA = qw(Exporter);
+
+our @EXPORT = qw(langinfo);
+
+our @EXPORT_OK = qw(
+	ABDAY_1
+	ABDAY_2
+	ABDAY_3
+	ABDAY_4
+	ABDAY_5
+	ABDAY_6
+	ABDAY_7
+	ABMON_1
+	ABMON_10
+	ABMON_11
+	ABMON_12
+	ABMON_2
+	ABMON_3
+	ABMON_4
+	ABMON_5
+	ABMON_6
+	ABMON_7
+	ABMON_8
+	ABMON_9
+	ALT_DIGITS
+	AM_STR
+	CODESET
+	CRNCYSTR
+	DAY_1
+	DAY_2
+	DAY_3
+	DAY_4
+	DAY_5
+	DAY_6
+	DAY_7
+	D_FMT
+	D_T_FMT
+	ERA
+	ERA_D_FMT
+	ERA_D_T_FMT
+	ERA_T_FMT
+	MON_1
+	MON_10
+	MON_11
+	MON_12
+	MON_2
+	MON_3
+	MON_4
+	MON_5
+	MON_6
+	MON_7
+	MON_8
+	MON_9
+	NOEXPR
+	NOSTR
+	PM_STR
+	RADIXCHAR
+	THOUSEP
+	T_FMT
+	T_FMT_AMPM
+	YESEXPR
+	YESSTR
+);
+
+our $VERSION = '0.19';
+
+XSLoader::load();
+
+1;
+__END__
+
+=head1 NAME
+
+I18N::Langinfo - query locale information
+
+=head1 SYNOPSIS
+
+  use I18N::Langinfo;
+
+=head1 DESCRIPTION
+
+The langinfo() function queries various locale information that can be
+used to localize output and user interfaces.  It uses the current underlying
+locale, regardless of whether or not it was called from within the scope of
+S<C<use locale>>.  The langinfo() function requires
+one numeric argument that identifies the locale constant to query:
+if no argument is supplied, C<$_> is used.  The numeric constants
+appropriate to be used as arguments are exportable from I18N::Langinfo.
+
+The following example will import the langinfo() function itself and
+three constants to be used as arguments to langinfo(): a constant for
+the abbreviated first day of the week (the numbering starts from
+Sunday = 1) and two more constants for the affirmative and negative
+answers for a yes/no question in the current locale.
+
+    use I18N::Langinfo qw(langinfo ABDAY_1 YESSTR NOSTR);
+
+    my ($abday_1, $yesstr, $nostr) =
+        map { langinfo($_) } (ABDAY_1, YESSTR, NOSTR);
+
+    print "$abday_1? [$yesstr/$nostr] ";
+
+In other words, in the "C" (or English) locale the above will probably
+print something like:
+
+    Sun? [yes/no]
+
+but under a French locale
+
+    dim? [oui/non]
+
+The usually available constants are as follows.
+
+=over 4
+
+=item *
+
+For abbreviated and full length days of the week and months of the year:
+
+    ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5 ABDAY_6 ABDAY_7
+    ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
+    ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12
+    DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7
+    MON_1 MON_2 MON_3 MON_4 MON_5 MON_6
+    MON_7 MON_8 MON_9 MON_10 MON_11 MON_12
+
+=item *
+
+For the date-time, date, and time formats used by the strftime() function
+(see L<POSIX>):
+
+    D_T_FMT D_FMT T_FMT
+
+=item *
+
+For the locales for which it makes sense to have ante meridiem and post
+meridiem time formats:
+
+    AM_STR PM_STR T_FMT_AMPM
+
+=item *
+
+For the character code set being used (such as "ISO8859-1", "cp850",
+"koi8-r", "sjis", "utf8", etc.), and for the currency string:
+
+    CODESET CRNCYSTR
+
+=item *
+
+For an alternate representation of digits, for the
+radix character used between the integer and the fractional part
+of decimal numbers, the group separator string for large-ish floating point
+numbers (yes, the final two are redundant with
+L<POSIX::localeconv()|POSIX/localeconv>):
+
+    ALT_DIGITS RADIXCHAR THOUSEP
+
+=item *
+
+For the affirmative and negative responses and expressions:
+
+    YESSTR YESEXPR NOSTR NOEXPR
+
+=item *
+
+For the eras based on typically some ruler, such as the Japanese Emperor
+(naturally only defined in the appropriate locales):
+
+    ERA ERA_D_FMT ERA_D_T_FMT ERA_T_FMT
+
+=back
+
+=head2 For systems without C<nl_langinfo>
+
+Starting in Perl 5.28, this module is available even on systems that lack a
+native C<nl_langinfo>.  On such systems, it uses various methods to construct
+what that function, if present, would return.  But there are potential
+glitches.  These are the items that could be different:
+
+=over
+
+=item C<ERA>
+
+Unimplemented, so returns C<"">.
+
+=item C<CODESET>
+
+Unimplemented, except on Windows, due to the vagaries of vendor locale names,
+returning C<""> on non-Windows.
+
+=item C<YESEXPR>
+
+=item C<YESSTR>
+
+=item C<NOEXPR>
+
+=item C<NOSTR>
+
+Only the values for English are returned.  C<YESSTR> and C<NOSTR> have been
+removed from POSIX 2008, and are retained here for backwards compatibility.
+Your platform's C<nl_langinfo> may not support them.
+
+=item C<D_FMT>
+
+Always evaluates to C<%x>, the locale's appropriate date representation.
+
+=item C<T_FMT>
+
+Always evaluates to C<%X>, the locale's appropriate time representation.
+
+=item C<D_T_FMT>
+
+Always evaluates to C<%c>, the locale's appropriate date and time
+representation.
+
+=item C<CRNCYSTR>
+
+The return may be incorrect for those rare locales where the currency symbol
+replaces the radix character.
+Send email to L<mailto:perlbug@perl.org> if you have examples of it needing
+to work differently.
+
+=item C<ALT_DIGITS>
+
+Currently this gives the same results as Linux does.
+Send email to L<mailto:perlbug@perl.org> if you have examples of it needing
+to work differently.
+
+=item C<ERA_D_FMT>
+
+=item C<ERA_T_FMT>
+
+=item C<ERA_D_T_FMT>
+
+=item C<T_FMT_AMPM>
+
+These are derived by using C<strftime()>, and not all versions of that function
+know about them.  C<""> is returned for these on such systems.
+
+=back
+
+See your L<nl_langinfo(3)> for more information about the available
+constants.  (Often this means having to look directly at the
+F<langinfo.h> C header file.)
+
+=head2 EXPORT
+
+By default only the C<langinfo()> function is exported.
+
+=head1 BUGS
+
+Before Perl 5.28, the returned values are unreliable for the C<RADIXCHAR> and
+C<THOUSEP> locale constants.
+
+Starting in 5.28, changing locales on threaded builds is supported on systems
+that offer thread-safe locale functions.  These include POSIX 2008 systems and
+Windows starting with Visual Studio 2005, and this module will work properly
+in such situations.  However, on threaded builds on Windows prior to Visual
+Studio 2015, retrieving the items C<CRNCYSTR> and C<THOUSEP> can result in a
+race with a thread that has converted to use the global locale.  It is quite
+uncommon for a thread to have done this.  It would be possible to construct a
+workaround for this; patches welcome: see L<perlapi/switch_to_global_locale>.
+
+=head1 SEE ALSO
+
+L<perllocale>, L<POSIX/localeconv>, L<POSIX/setlocale>, L<nl_langinfo(3)>.
+
+The langinfo() function is just a wrapper for the C nl_langinfo() interface.
+
+=head1 AUTHOR
+
+Jarkko Hietaniemi, E<lt>jhi@hut.fiE<gt>.  Now maintained by Perl 5 porters.
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright 2001 by Jarkko Hietaniemi
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut