Initial Commit

2025-12-03 16:38:10 +01:00
parent c5e26bf594
commit b732d8d4b5
17680 changed files with 5977495 additions and 2 deletions
--- a/database/perl/lib/Encode/Alias.pm
+++ b/database/perl/lib/Encode/Alias.pm
@@ -0,0 +1,395 @@
+package Encode::Alias;
+use strict;
+use warnings;
+our $VERSION = do { my @r = ( q$Revision: 2.24 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
+
+use Exporter 'import';
+
+# Public, encouraged API is exported by default
+
+our @EXPORT =
+  qw (
+  define_alias
+  find_alias
+);
+
+our @Alias;    # ordered matching list
+our %Alias;    # cached known aliases
+
+sub find_alias {
+    my $class = shift;
+    my $find  = shift;
+    unless ( exists $Alias{$find} ) {
+        $Alias{$find} = undef;    # Recursion guard
+        for ( my $i = 0 ; $i < @Alias ; $i += 2 ) {
+            my $alias = $Alias[$i];
+            my $val   = $Alias[ $i + 1 ];
+            my $new;
+            if ( ref($alias) eq 'Regexp' && $find =~ $alias ) {
+                DEBUG and warn "eval $val";
+                $new = eval $val;
+                DEBUG and $@ and warn "$val, $@";
+            }
+            elsif ( ref($alias) eq 'CODE' ) {
+                DEBUG and warn "$alias", "->", "($find)";
+                $new = $alias->($find);
+            }
+            elsif ( lc($find) eq lc($alias) ) {
+                $new = $val;
+            }
+            if ( defined($new) ) {
+                next if $new eq $find;    # avoid (direct) recursion on bugs
+                DEBUG and warn "$alias, $new";
+                my $enc =
+                  ( ref($new) ) ? $new : Encode::find_encoding($new);
+                if ($enc) {
+                    $Alias{$find} = $enc;
+                    last;
+                }
+            }
+        }
+
+        # case insensitive search when canonical is not in all lowercase
+        # RT ticket #7835
+        unless ( $Alias{$find} ) {
+            my $lcfind = lc($find);
+            for my $name ( keys %Encode::Encoding, keys %Encode::ExtModule )
+            {
+                $lcfind eq lc($name) or next;
+                $Alias{$find} = Encode::find_encoding($name);
+                DEBUG and warn "$find => $name";
+            }
+        }
+    }
+    if (DEBUG) {
+        my $name;
+        if ( my $e = $Alias{$find} ) {
+            $name = $e->name;
+        }
+        else {
+            $name = "";
+        }
+        warn "find_alias($class, $find)->name = $name";
+    }
+    return $Alias{$find};
+}
+
+sub define_alias {
+    while (@_) {
+        my $alias = shift;
+        my $name = shift;
+        unshift( @Alias, $alias => $name )    # newer one has precedence
+            if defined $alias;
+        if ( ref($alias) ) {
+
+            # clear %Alias cache to allow overrides
+            my @a = keys %Alias;
+            for my $k (@a) {
+                if ( ref($alias) eq 'Regexp' && $k =~ $alias ) {
+                    DEBUG and warn "delete \$Alias\{$k\}";
+                    delete $Alias{$k};
+                }
+                elsif ( ref($alias) eq 'CODE' && $alias->($k) ) {
+                    DEBUG and warn "delete \$Alias\{$k\}";
+                    delete $Alias{$k};
+                }
+            }
+        }
+        elsif (defined $alias) {
+            DEBUG and warn "delete \$Alias\{$alias\}";
+            delete $Alias{$alias};
+        }
+        elsif (DEBUG) {
+            require Carp;
+            Carp::croak("undef \$alias");
+        }
+    }
+}
+
+# HACK: Encode must be used after define_alias is declarated as Encode calls define_alias
+use Encode ();
+
+# Allow latin-1 style names as well
+# 0  1  2  3  4  5   6   7   8   9  10
+our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
+
+# Allow winlatin1 style names as well
+our %Winlatin2cp = (
+    'latin1'     => 1252,
+    'latin2'     => 1250,
+    'cyrillic'   => 1251,
+    'greek'      => 1253,
+    'turkish'    => 1254,
+    'hebrew'     => 1255,
+    'arabic'     => 1256,
+    'baltic'     => 1257,
+    'vietnamese' => 1258,
+);
+
+init_aliases();
+
+sub undef_aliases {
+    @Alias = ();
+    %Alias = ();
+}
+
+sub init_aliases {
+    undef_aliases();
+
+    # Try all-lower-case version should all else fails
+    define_alias( qr/^(.*)$/ => '"\L$1"' );
+
+    # UTF/UCS stuff
+    define_alias( qr/^(unicode-1-1-)?UTF-?7$/i     => '"UTF-7"' );
+    define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
+    define_alias(
+        qr/^UCS-?2-?(BE)?$/i    => '"UCS-2BE"',
+        qr/^UCS-?4-?(BE|LE|)?$/i => 'uc("UTF-32$1")',
+        qr/^iso-10646-1$/i      => '"UCS-2BE"'
+    );
+    define_alias(
+        qr/^UTF-?(16|32)-?BE$/i => '"UTF-$1BE"',
+        qr/^UTF-?(16|32)-?LE$/i => '"UTF-$1LE"',
+        qr/^UTF-?(16|32)$/i     => '"UTF-$1"',
+    );
+
+    # ASCII
+    define_alias( qr/^(?:US-?)ascii$/i       => '"ascii"' );
+    define_alias( 'C'                        => 'ascii' );
+    define_alias( qr/\b(?:ISO[-_]?)?646(?:[-_]?US)?$/i => '"ascii"' );
+
+    # Allow variants of iso-8859-1 etc.
+    define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
+
+    # At least HP-UX has these.
+    define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' );
+
+    # More HP stuff.
+    define_alias(
+        qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i =>
+          '"${1}8"' );
+
+    # The Official name of ASCII.
+    define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
+
+    # This is a font issue, not an encoding issue.
+    # (The currency symbol of the Latin 1 upper half
+    #  has been redefined as the euro symbol.)
+    define_alias( qr/^(.+)\@euro$/i => '"$1"' );
+
+    define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i =>
+'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef'
+    );
+
+    define_alias(
+        qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
+             hebrew|arabic|baltic|vietnamese)$/ix =>
+          '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}'
+    );
+
+    # Common names for non-latin preferred MIME names
+    define_alias(
+        'ascii'    => 'US-ascii',
+        'cyrillic' => 'iso-8859-5',
+        'arabic'   => 'iso-8859-6',
+        'greek'    => 'iso-8859-7',
+        'hebrew'   => 'iso-8859-8',
+        'thai'     => 'iso-8859-11',
+    );
+    # RT #20781
+    define_alias(qr/\btis-?620\b/i  => '"iso-8859-11"');
+
+    # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
+    # And Microsoft has their own naming (again, surprisingly).
+    # And windows-* is registered in IANA!
+    define_alias(
+        qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"' );
+
+    # Sometimes seen with a leading zero.
+    # define_alias( qr/\bcp037\b/i => '"cp37"');
+
+    # Mac Mappings
+    # predefined in *.ucm; unneeded
+    # define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
+    define_alias( qr/^(?:x[_-])?mac[_-](.*)$/i => '"mac$1"' );
+    # http://rt.cpan.org/Ticket/Display.html?id=36326
+    define_alias( qr/^macintosh$/i => '"MacRoman"' );
+    # https://rt.cpan.org/Ticket/Display.html?id=78125
+    define_alias( qr/^macce$/i => '"MacCentralEurRoman"' );
+    # Ououououou. gone.  They are different!
+    # define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
+
+    # Standardize on the dashed versions.
+    define_alias( qr/\bkoi8[\s\-_]*([ru])$/i => '"koi8-$1"' );
+
+    unless ($Encode::ON_EBCDIC) {
+
+        # for Encode::CN
+        define_alias( qr/\beuc.*cn$/i => '"euc-cn"' );
+        define_alias( qr/\bcn.*euc$/i => '"euc-cn"' );
+
+        # define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
+        # CP936 doesn't have vendor-addon for GBK, so they're identical.
+        define_alias( qr/^gbk$/i => '"cp936"' );
+
+        # This fixes gb2312 vs. euc-cn confusion, practically
+        define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' );
+
+        # for Encode::JP
+        define_alias( qr/\bjis$/i         => '"7bit-jis"' );
+        define_alias( qr/\beuc.*jp$/i     => '"euc-jp"' );
+        define_alias( qr/\bjp.*euc$/i     => '"euc-jp"' );
+        define_alias( qr/\bujis$/i        => '"euc-jp"' );
+        define_alias( qr/\bshift.*jis$/i  => '"shiftjis"' );
+        define_alias( qr/\bsjis$/i        => '"shiftjis"' );
+        define_alias( qr/\bwindows-31j$/i => '"cp932"' );
+
+        # for Encode::KR
+        define_alias( qr/\beuc.*kr$/i => '"euc-kr"' );
+        define_alias( qr/\bkr.*euc$/i => '"euc-kr"' );
+
+        # This fixes ksc5601 vs. euc-kr confusion, practically
+        define_alias( qr/(?:x-)?uhc$/i         => '"cp949"' );
+        define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' );
+        define_alias( qr/\bks_c_5601-1987$/i   => '"cp949"' );
+
+        # for Encode::TW
+        define_alias( qr/\bbig-?5$/i              => '"big5-eten"' );
+        define_alias( qr/\bbig5-?et(?:en)?$/i     => '"big5-eten"' );
+        define_alias( qr/\btca[-_]?big5$/i        => '"big5-eten"' );
+        define_alias( qr/\bbig5-?hk(?:scs)?$/i    => '"big5-hkscs"' );
+        define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
+    }
+
+    # https://github.com/dankogai/p5-encode/issues/37
+    define_alias(qr/cp65000/i => '"UTF-7"');
+    define_alias(qr/cp65001/i => '"utf-8-strict"');
+
+    # utf8 is blessed :)
+    define_alias( qr/\bUTF-8$/i => '"utf-8-strict"' );
+
+    # At last, Map white space and _ to '-'
+    define_alias( qr/^([^\s_]+)[\s_]+([^\s_]*)$/i => '"$1-$2"' );
+}
+
+1;
+__END__
+
+# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
+# TODO: HP-UX '15' encodings japanese15 korean15 roi15
+# TODO: Cyrillic encoding ISO-IR-111 (useful?)
+# TODO: Armenian encoding ARMSCII-8
+# TODO: Hebrew encoding ISO-8859-8-1
+# TODO: Thai encoding TCVN
+# TODO: Vietnamese encodings VPS
+# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
+#       ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
+#       Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
+#       Kannada Khmer Korean Laotian Malayalam Mongolian
+#       Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
+
+=head1 NAME
+
+Encode::Alias - alias definitions to encodings
+
+=head1 SYNOPSIS
+
+  use Encode;
+  use Encode::Alias;
+  define_alias( "newName" => ENCODING);
+  define_alias( qr/.../ => ENCODING);
+  define_alias( sub { return ENCODING if ...; } );
+
+=head1 DESCRIPTION
+
+Allows newName to be used as an alias for ENCODING. ENCODING may be
+either the name of an encoding or an encoding object (as described 
+in L<Encode>).
+
+Currently the first argument to define_alias() can be specified in the
+following ways:
+
+=over 4
+
+=item As a simple string.
+
+=item As a qr// compiled regular expression, e.g.:
+
+  define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
+
+In this case, if I<ENCODING> is not a reference, it is C<eval>-ed
+in order to allow C<$1> etc. to be substituted.  The example is one
+way to alias names as used in X11 fonts to the MIME names for the
+iso-8859-* family.  Note the double quotes inside the single quotes.
+
+(or, you don't have to do this yourself because this example is predefined)
+
+If you are using a regex here, you have to use the quotes as shown or
+it won't work.  Also note that regex handling is tricky even for the
+experienced.  Use this feature with caution.
+
+=item As a code reference, e.g.:
+
+  define_alias( sub {shift =~ /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
+
+The same effect as the example above in a different way.  The coderef
+takes the alias name as an argument and returns a canonical name on
+success or undef if not.  Note the second argument is ignored if provided.
+Use this with even more caution than the regex version.
+
+=back
+
+=head3 Changes in code reference aliasing
+
+As of Encode 1.87, the older form
+
+  define_alias( sub { return  /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
+
+no longer works. 
+
+Encode up to 1.86 internally used "local $_" to implement this older
+form.  But consider the code below;
+
+  use Encode;
+  $_ = "eeeee" ;
+  while (/(e)/g) {
+    my $utf = decode('aliased-encoding-name', $1);
+    print "position:",pos,"\n";
+  }
+
+Prior to Encode 1.86 this fails because of "local $_".
+
+=head2 Alias overloading
+
+You can override predefined aliases by simply applying define_alias().
+The new alias is always evaluated first, and when necessary,
+define_alias() flushes the internal cache to make the new definition
+available.
+
+  # redirect SHIFT_JIS to MS/IBM Code Page 932, which is a
+  # superset of SHIFT_JIS
+
+  define_alias( qr/shift.*jis$/i  => '"cp932"' );
+  define_alias( qr/sjis$/i        => '"cp932"' );
+
+If you want to zap all predefined aliases, you can use
+
+  Encode::Alias->undef_aliases;
+
+to do so.  And
+
+  Encode::Alias->init_aliases;
+
+gets the factory settings back.
+
+Note that define_alias() will not be able to override the canonical name
+of encodings. Encodings are first looked up by canonical name before
+potential aliases are tried.
+
+=head1 SEE ALSO
+
+L<Encode>, L<Encode::Supported>
+
+=cut
+
--- a/database/perl/lib/Encode/Byte.pm
+++ b/database/perl/lib/Encode/Byte.pm
@@ -0,0 +1,120 @@
+package Encode::Byte;
+use strict;
+use warnings;
+use Encode;
+our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+use XSLoader;
+XSLoader::load( __PACKAGE__, $VERSION );
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::Byte - Single Byte Encodings
+
+=head1 SYNOPSIS
+
+    use Encode qw/encode decode/; 
+    $greek = encode("iso-8859-7", $utf8);  # loads Encode::Byte implicitly
+    $utf8  = decode("iso-8859-7", $greek); # ditto
+
+=head1 ABSTRACT
+
+This module implements various single byte encodings.  For most cases it uses
+\x80-\xff (upper half) to map non-ASCII characters.  Encodings
+supported are as follows.   
+
+  Canonical      Alias		                      Description
+  --------------------------------------------------------------------
+  # ISO 8859 series
+  (iso-8859-1	is in built-in)
+  iso-8859-2	latin2					     [ISO]
+  iso-8859-3	latin3					     [ISO]
+  iso-8859-4	latin4					     [ISO]
+  iso-8859-5						     [ISO]
+  iso-8859-6						     [ISO]
+  iso-8859-7						     [ISO]
+  iso-8859-8						     [ISO]
+  iso-8859-9	latin5					     [ISO]
+  iso-8859-10	latin6					     [ISO]
+  iso-8859-11
+  (iso-8859-12 is nonexistent)
+  iso-8859-13   latin7					     [ISO]
+  iso-8859-14	latin8					     [ISO]
+  iso-8859-15	latin9					     [ISO]
+  iso-8859-16	latin10					     [ISO]
+
+  # Cyrillic
+  koi8-f					
+  koi8-r        cp878					 [RFC1489]
+  koi8-u						 [RFC2319]
+
+  # Vietnamese
+  viscii
+
+  # all cp* are also available as ibm-*, ms-*, and windows-*
+  # also see L<http://msdn.microsoft.com/en-us/library/aa752010%28VS.85%29.aspx>
+
+  cp424  
+  cp437  
+  cp737  
+  cp775  
+  cp850  
+  cp852  
+  cp855  
+  cp856  
+  cp857  
+  cp860  
+  cp861  
+  cp862  
+  cp863  
+  cp864  
+  cp865  
+  cp866  
+  cp869  
+  cp874  
+  cp1006  
+  cp1250	WinLatin2
+  cp1251	WinCyrillic
+  cp1252	WinLatin1
+  cp1253	WinGreek
+  cp1254	WinTurkish
+  cp1255	WinHebrew
+  cp1256	WinArabic
+  cp1257	WinBaltic
+  cp1258	WinVietnamese
+
+  # Macintosh
+  # Also see L<http://developer.apple.com/technotes/tn/tn1150.html>
+  MacArabic  
+  MacCentralEurRoman  
+  MacCroatian  
+  MacCyrillic  
+  MacFarsi  
+  MacGreek  
+  MacHebrew  
+  MacIcelandic  
+  MacRoman  
+  MacRomanian  
+  MacRumanian  
+  MacSami  
+  MacThai  
+  MacTurkish  
+  MacUkrainian  
+
+  # More vendor encodings
+  AdobeStandardEncoding
+  nextstep
+  hp-roman8
+
+=head1 DESCRIPTION
+
+To find how to use this module in detail, see L<Encode>.
+
+=head1 SEE ALSO
+
+L<Encode>
+
+=cut
--- a/database/perl/lib/Encode/CJKConstants.pm
+++ b/database/perl/lib/Encode/CJKConstants.pm
@@ -0,0 +1,66 @@
+#
+# $Id: CJKConstants.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $
+#
+
+package Encode::CJKConstants;
+
+use strict;
+use warnings;
+our $RCSID = q$Id: CJKConstants.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $;
+our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+use Carp;
+
+require Exporter;
+our @ISA         = qw(Exporter);
+our @EXPORT      = qw();
+our @EXPORT_OK   = qw(%CHARCODE %ESC %RE);
+our %EXPORT_TAGS = ( 'all' => [ @EXPORT_OK, @EXPORT ] );
+
+my %_0208 = (
+    1978 => '\e\$\@',
+    1983 => '\e\$B',
+    1990 => '\e&\@\e\$B',
+);
+
+our %CHARCODE = (
+    UNDEF_EUC     => "\xa2\xae",    # <20><> in EUC
+    UNDEF_SJIS    => "\x81\xac",    # <20><> in SJIS
+    UNDEF_JIS     => "\xa2\xf7",    # <20><> -- used in unicode
+    UNDEF_UNICODE => "\x20\x20",    # <20><> -- used in unicode
+);
+
+our %ESC = (
+    GB_2312   => "\e\$A",
+    JIS_0208  => "\e\$B",
+    JIS_0212  => "\e\$(D",
+    KSC_5601  => "\e\$(C",
+    ASC       => "\e\(B",
+    KANA      => "\e\(I",
+    '2022_KR' => "\e\$)C",
+);
+
+our %RE = (
+    ASCII     => '[\x00-\x7f]',
+    BIN       => '[\x00-\x06\x7f\xff]',
+    EUC_0212  => '\x8f[\xa1-\xfe][\xa1-\xfe]',
+    EUC_C     => '[\xa1-\xfe][\xa1-\xfe]',
+    EUC_KANA  => '\x8e[\xa1-\xdf]',
+    JIS_0208  => "$_0208{1978}|$_0208{1983}|$_0208{1990}",
+    JIS_0212  => "\e" . '\$\(D',
+    ISO_ASC   => "\e" . '\([BJ]',
+    JIS_KANA  => "\e" . '\(I',
+    '2022_KR' => "\e" . '\$\)C',
+    SJIS_C    => '[\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]',
+    SJIS_KANA => '[\xa1-\xdf]',
+    UTF8      => '[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]'
+);
+
+1;
+
+=head1 NAME
+
+Encode::CJKConstants.pm -- Internally used by Encode::??::ISO_2022_*
+
+=cut
+
--- a/database/perl/lib/Encode/CN.pm
+++ b/database/perl/lib/Encode/CN.pm
@@ -0,0 +1,74 @@
+package Encode::CN;
+BEGIN {
+    if ( ord("A") == 193 ) {
+        die "Encode::CN not supported on EBCDIC\n";
+    }
+}
+use strict;
+use warnings;
+use Encode;
+our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+use XSLoader;
+XSLoader::load( __PACKAGE__, $VERSION );
+
+# Relocated from Encode.pm
+
+use Encode::CN::HZ;
+
+# use Encode::CN::2022_CN;
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::CN - China-based Chinese Encodings
+
+=head1 SYNOPSIS
+
+    use Encode qw/encode decode/; 
+    $euc_cn = encode("euc-cn", $utf8);   # loads Encode::CN implicitly
+    $utf8   = decode("euc-cn", $euc_cn); # ditto
+
+=head1 DESCRIPTION
+
+This module implements China-based Chinese charset encodings.
+Encodings supported are as follows.
+
+  Canonical   Alias		Description
+  --------------------------------------------------------------------
+  euc-cn      /\beuc.*cn$/i	EUC (Extended Unix Character)
+          /\bcn.*euc$/i
+              /\bGB[-_ ]?2312(?:\D.*$|$)/i (see below)
+  gb2312-raw			The raw (low-bit) GB2312 character map
+  gb12345-raw			Traditional chinese counterpart to 
+                GB2312 (raw)
+  iso-ir-165			GB2312 + GB6345 + GB8565 + additions
+  MacChineseSimp                GB2312 + Apple Additions
+  cp936				Code Page 936, also known as GBK 
+                (Extended GuoBiao)
+  hz				7-bit escaped GB2312 encoding
+  --------------------------------------------------------------------
+
+To find how to use this module in detail, see L<Encode>.
+
+=head1 NOTES
+
+Due to size concerns, C<GB 18030> (an extension to C<GBK>) is distributed
+separately on CPAN, under the name L<Encode::HanExtra>. That module
+also contains extra Taiwan-based encodings.
+
+=head1 BUGS
+
+When you see C<charset=gb2312> on mails and web pages, they really
+mean C<euc-cn> encodings.  To fix that, C<gb2312> is aliased to C<euc-cn>.
+Use C<gb2312-raw> when you really mean it.
+
+The ASCII region (0x00-0x7f) is preserved for all encodings, even though
+this conflicts with mappings by the Unicode Consortium.
+
+=head1 SEE ALSO
+
+L<Encode>
+
+=cut
--- a/database/perl/lib/Encode/CN/HZ.pm
+++ b/database/perl/lib/Encode/CN/HZ.pm
@@ -0,0 +1,201 @@
+package Encode::CN::HZ;
+
+use strict;
+use warnings;
+use utf8 ();
+
+use vars qw($VERSION);
+$VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+use Encode qw(:fallbacks);
+
+use parent qw(Encode::Encoding);
+__PACKAGE__->Define('hz');
+
+# HZ is a combination of ASCII and escaped GB, so we implement it
+# with the GB2312(raw) encoding here. Cf. RFCs 1842 & 1843.
+
+# not ported for EBCDIC.  Which should be used, "~" or "\x7E"?
+
+sub needs_lines { 1 }
+
+sub decode ($$;$) {
+    my ( $obj, $str, $chk ) = @_;
+    return undef unless defined $str;
+
+    my $GB  = Encode::find_encoding('gb2312-raw');
+    my $ret = substr($str, 0, 0); # to propagate taintedness
+    my $in_ascii = 1;    # default mode is ASCII.
+
+    while ( length $str ) {
+        if ($in_ascii) {    # ASCII mode
+            if ( $str =~ s/^([\x00-\x7D\x7F]+)// ) {    # no '~' => ASCII
+                $ret .= $1;
+
+                # EBCDIC should need ascii2native, but not ported.
+            }
+            elsif ( $str =~ s/^\x7E\x7E// ) {           # escaped tilde
+                $ret .= '~';
+            }
+            elsif ( $str =~ s/^\x7E\cJ// ) {    # '\cJ' == LF in ASCII
+                1;                              # no-op
+            }
+            elsif ( $str =~ s/^\x7E\x7B// ) {    # '~{'
+                $in_ascii = 0;                   # to GB
+            }
+            else {    # encounters an invalid escape, \x80 or greater
+                last;
+            }
+        }
+        else {        # GB mode; the byte ranges are as in RFC 1843.
+            no warnings 'uninitialized';
+            if ( $str =~ s/^((?:[\x21-\x77][\x21-\x7E])+)// ) {
+                my $prefix = $1;
+                $ret .= $GB->decode( $prefix, $chk );
+            }
+            elsif ( $str =~ s/^\x7E\x7D// ) {    # '~}'
+                $in_ascii = 1;
+            }
+            else {                               # invalid
+                last;
+            }
+        }
+    }
+    $_[1] = '' if $chk;    # needs_lines guarantees no partial character
+    return $ret;
+}
+
+sub cat_decode {
+    my ( $obj, undef, $src, $pos, $trm, $chk ) = @_;
+    my ( $rdst, $rsrc, $rpos ) = \@_[ 1 .. 3 ];
+
+    my $GB  = Encode::find_encoding('gb2312-raw');
+    my $ret = '';
+    my $in_ascii = 1;      # default mode is ASCII.
+
+    my $ini_pos = pos($$rsrc);
+
+    substr( $src, 0, $pos ) = '';
+
+    my $ini_len = bytes::length($src);
+
+    # $trm is the first of the pair '~~', then 2nd tilde is to be removed.
+    # XXX: Is better C<$src =~ s/^\x7E// or die if ...>?
+    $src =~ s/^\x7E// if $trm eq "\x7E";
+
+    while ( length $src ) {
+        my $now;
+        if ($in_ascii) {    # ASCII mode
+            if ( $src =~ s/^([\x00-\x7D\x7F])// ) {    # no '~' => ASCII
+                $now = $1;
+            }
+            elsif ( $src =~ s/^\x7E\x7E// ) {          # escaped tilde
+                $now = '~';
+            }
+            elsif ( $src =~ s/^\x7E\cJ// ) {    # '\cJ' == LF in ASCII
+                next;
+            }
+            elsif ( $src =~ s/^\x7E\x7B// ) {    # '~{'
+                $in_ascii = 0;                   # to GB
+                next;
+            }
+            else {    # encounters an invalid escape, \x80 or greater
+                last;
+            }
+        }
+        else {        # GB mode; the byte ranges are as in RFC 1843.
+            if ( $src =~ s/^((?:[\x21-\x77][\x21-\x7F])+)// ) {
+                $now = $GB->decode( $1, $chk );
+            }
+            elsif ( $src =~ s/^\x7E\x7D// ) {    # '~}'
+                $in_ascii = 1;
+                next;
+            }
+            else {                               # invalid
+                last;
+            }
+        }
+
+        next if !defined $now;
+
+        $ret .= $now;
+
+        if ( $now eq $trm ) {
+            $$rdst .= $ret;
+            $$rpos = $ini_pos + $pos + $ini_len - bytes::length($src);
+            pos($$rsrc) = $ini_pos;
+            return 1;
+        }
+    }
+
+    $$rdst .= $ret;
+    $$rpos = $ini_pos + $pos + $ini_len - bytes::length($src);
+    pos($$rsrc) = $ini_pos;
+    return '';    # terminator not found
+}
+
+sub encode($$;$) {
+     my ( $obj, $str, $chk ) = @_;
+    return undef unless defined $str;
+
+    my $GB  = Encode::find_encoding('gb2312-raw');
+    my $ret = substr($str, 0, 0); # to propagate taintedness;
+    my $in_ascii = 1;    # default mode is ASCII.
+
+    no warnings 'utf8';  # $str may be malformed UTF8 at the end of a chunk.
+
+    while ( length $str ) {
+        if ( $str =~ s/^([[:ascii:]]+)// ) {
+            my $tmp = $1;
+            $tmp =~ s/~/~~/g;    # escapes tildes
+            if ( !$in_ascii ) {
+                $ret .= "\x7E\x7D";    # '~}'
+                $in_ascii = 1;
+            }
+            $ret .= pack 'a*', $tmp;    # remove UTF8 flag.
+        }
+        elsif ( $str =~ s/(.)// ) {
+            my $s = $1;
+            my $tmp = $GB->encode( $s, $chk || 0 );
+            last if !defined $tmp;
+            if ( length $tmp == 2 ) {    # maybe a valid GB char (XXX)
+                if ($in_ascii) {
+                    $ret .= "\x7E\x7B";    # '~{'
+                    $in_ascii = 0;
+                }
+                $ret .= $tmp;
+            }
+            elsif ( length $tmp ) {        # maybe FALLBACK in ASCII (XXX)
+                if ( !$in_ascii ) {
+                    $ret .= "\x7E\x7D";    # '~}'
+                    $in_ascii = 1;
+                }
+                $ret .= $tmp;
+            }
+        }
+        else {    # if $str is malformed UTF8 *and* if length $str != 0.
+            last;
+        }
+    }
+    $_[1] = $str if $chk;
+
+    # The state at the end of the chunk is discarded, even if in GB mode.
+    # That results in the combination of GB-OUT and GB-IN, i.e. "~}~{".
+    # Parhaps it is harmless, but further investigations may be required...
+
+    if ( !$in_ascii ) {
+        $ret .= "\x7E\x7D";    # '~}'
+        $in_ascii = 1;
+    }
+    utf8::encode($ret); # https://rt.cpan.org/Ticket/Display.html?id=35120
+    return $ret;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::CN::HZ -- internally used by Encode::CN
+
+=cut
--- a/database/perl/lib/Encode/Changes.e2x
+++ b/database/perl/lib/Encode/Changes.e2x
@@ -0,0 +1,7 @@
+#
+# $Id: Changes.e2x,v 2.0 2004/05/16 20:55:15 dankogai Exp $
+# Revision history for Perl extension Encode::$_Name_.
+#
+
+0.01  $_Now_
+  Autogenerated by enc2xs version $_Version_.
--- a/database/perl/lib/Encode/Config.pm
+++ b/database/perl/lib/Encode/Config.pm
@@ -0,0 +1,170 @@
+#
+# Demand-load module list
+#
+package Encode::Config;
+our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+use strict;
+use warnings;
+
+our %ExtModule = (
+
+    # Encode::Byte
+    #iso-8859-1 is in Encode.pm itself
+    'iso-8859-2'            => 'Encode::Byte',
+    'iso-8859-3'            => 'Encode::Byte',
+    'iso-8859-4'            => 'Encode::Byte',
+    'iso-8859-5'            => 'Encode::Byte',
+    'iso-8859-6'            => 'Encode::Byte',
+    'iso-8859-7'            => 'Encode::Byte',
+    'iso-8859-8'            => 'Encode::Byte',
+    'iso-8859-9'            => 'Encode::Byte',
+    'iso-8859-10'           => 'Encode::Byte',
+    'iso-8859-11'           => 'Encode::Byte',
+    'iso-8859-13'           => 'Encode::Byte',
+    'iso-8859-14'           => 'Encode::Byte',
+    'iso-8859-15'           => 'Encode::Byte',
+    'iso-8859-16'           => 'Encode::Byte',
+    'koi8-f'                => 'Encode::Byte',
+    'koi8-r'                => 'Encode::Byte',
+    'koi8-u'                => 'Encode::Byte',
+    'viscii'                => 'Encode::Byte',
+    'cp424'                 => 'Encode::Byte',
+    'cp437'                 => 'Encode::Byte',
+    'cp737'                 => 'Encode::Byte',
+    'cp775'                 => 'Encode::Byte',
+    'cp850'                 => 'Encode::Byte',
+    'cp852'                 => 'Encode::Byte',
+    'cp855'                 => 'Encode::Byte',
+    'cp856'                 => 'Encode::Byte',
+    'cp857'                 => 'Encode::Byte',
+    'cp858'                 => 'Encode::Byte',
+    'cp860'                 => 'Encode::Byte',
+    'cp861'                 => 'Encode::Byte',
+    'cp862'                 => 'Encode::Byte',
+    'cp863'                 => 'Encode::Byte',
+    'cp864'                 => 'Encode::Byte',
+    'cp865'                 => 'Encode::Byte',
+    'cp866'                 => 'Encode::Byte',
+    'cp869'                 => 'Encode::Byte',
+    'cp874'                 => 'Encode::Byte',
+    'cp1006'                => 'Encode::Byte',
+    'cp1250'                => 'Encode::Byte',
+    'cp1251'                => 'Encode::Byte',
+    'cp1252'                => 'Encode::Byte',
+    'cp1253'                => 'Encode::Byte',
+    'cp1254'                => 'Encode::Byte',
+    'cp1255'                => 'Encode::Byte',
+    'cp1256'                => 'Encode::Byte',
+    'cp1257'                => 'Encode::Byte',
+    'cp1258'                => 'Encode::Byte',
+    'AdobeStandardEncoding' => 'Encode::Byte',
+    'MacArabic'             => 'Encode::Byte',
+    'MacCentralEurRoman'    => 'Encode::Byte',
+    'MacCroatian'           => 'Encode::Byte',
+    'MacCyrillic'           => 'Encode::Byte',
+    'MacFarsi'              => 'Encode::Byte',
+    'MacGreek'              => 'Encode::Byte',
+    'MacHebrew'             => 'Encode::Byte',
+    'MacIcelandic'          => 'Encode::Byte',
+    'MacRoman'              => 'Encode::Byte',
+    'MacRomanian'           => 'Encode::Byte',
+    'MacRumanian'           => 'Encode::Byte',
+    'MacSami'               => 'Encode::Byte',
+    'MacThai'               => 'Encode::Byte',
+    'MacTurkish'            => 'Encode::Byte',
+    'MacUkrainian'          => 'Encode::Byte',
+    'nextstep'              => 'Encode::Byte',
+    'hp-roman8'             => 'Encode::Byte',
+    #'gsm0338'               => 'Encode::Byte',
+    'gsm0338'               => 'Encode::GSM0338',
+
+    # Encode::EBCDIC
+    'cp37'     => 'Encode::EBCDIC',
+    'cp500'    => 'Encode::EBCDIC',
+    'cp875'    => 'Encode::EBCDIC',
+    'cp1026'   => 'Encode::EBCDIC',
+    'cp1047'   => 'Encode::EBCDIC',
+    'posix-bc' => 'Encode::EBCDIC',
+
+    # Encode::Symbol
+    'dingbats'      => 'Encode::Symbol',
+    'symbol'        => 'Encode::Symbol',
+    'AdobeSymbol'   => 'Encode::Symbol',
+    'AdobeZdingbat' => 'Encode::Symbol',
+    'MacDingbats'   => 'Encode::Symbol',
+    'MacSymbol'     => 'Encode::Symbol',
+
+    # Encode::Unicode
+    'UCS-2BE'  => 'Encode::Unicode',
+    'UCS-2LE'  => 'Encode::Unicode',
+    'UTF-16'   => 'Encode::Unicode',
+    'UTF-16BE' => 'Encode::Unicode',
+    'UTF-16LE' => 'Encode::Unicode',
+    'UTF-32'   => 'Encode::Unicode',
+    'UTF-32BE' => 'Encode::Unicode',
+    'UTF-32LE' => 'Encode::Unicode',
+    'UTF-7'    => 'Encode::Unicode::UTF7',
+);
+
+unless ( ord("A") == 193 ) {
+    %ExtModule = (
+        %ExtModule,
+        'euc-cn'         => 'Encode::CN',
+        'gb12345-raw'    => 'Encode::CN',
+        'gb2312-raw'     => 'Encode::CN',
+        'hz'             => 'Encode::CN',
+        'iso-ir-165'     => 'Encode::CN',
+        'cp936'          => 'Encode::CN',
+        'MacChineseSimp' => 'Encode::CN',
+
+        '7bit-jis'      => 'Encode::JP',
+        'euc-jp'        => 'Encode::JP',
+        'iso-2022-jp'   => 'Encode::JP',
+        'iso-2022-jp-1' => 'Encode::JP',
+        'jis0201-raw'   => 'Encode::JP',
+        'jis0208-raw'   => 'Encode::JP',
+        'jis0212-raw'   => 'Encode::JP',
+        'cp932'         => 'Encode::JP',
+        'MacJapanese'   => 'Encode::JP',
+        'shiftjis'      => 'Encode::JP',
+
+        'euc-kr'      => 'Encode::KR',
+        'iso-2022-kr' => 'Encode::KR',
+        'johab'       => 'Encode::KR',
+        'ksc5601-raw' => 'Encode::KR',
+        'cp949'       => 'Encode::KR',
+        'MacKorean'   => 'Encode::KR',
+
+        'big5-eten'      => 'Encode::TW',
+        'big5-hkscs'     => 'Encode::TW',
+        'cp950'          => 'Encode::TW',
+        'MacChineseTrad' => 'Encode::TW',
+
+        #'big5plus'           => 'Encode::HanExtra',
+        #'euc-tw'             => 'Encode::HanExtra',
+        #'gb18030'            => 'Encode::HanExtra',
+
+        'MIME-Header' => 'Encode::MIME::Header',
+        'MIME-B'      => 'Encode::MIME::Header',
+        'MIME-Q'      => 'Encode::MIME::Header',
+
+        'MIME-Header-ISO_2022_JP' => 'Encode::MIME::Header::ISO_2022_JP',
+    );
+}
+
+#
+# Why not export ? to keep ConfigLocal Happy!
+#
+while ( my ( $enc, $mod ) = each %ExtModule ) {
+    $Encode::ExtModule{$enc} = $mod;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::Config -- internally used by Encode
+
+=cut
--- a/database/perl/lib/Encode/ConfigLocal_PM.e2x
+++ b/database/perl/lib/Encode/ConfigLocal_PM.e2x
@@ -0,0 +1,13 @@
+#
+# Local demand-load module list
+#
+# You should not edit this file by hand!  use "enc2xs -C"
+# 
+package Encode::ConfigLocal;
+our $VERSION = $_LocalVer_;
+
+use strict;
+
+$_ModLines_
+
+1;
--- a/database/perl/lib/Encode/EBCDIC.pm
+++ b/database/perl/lib/Encode/EBCDIC.pm
@@ -0,0 +1,45 @@
+package Encode::EBCDIC;
+use strict;
+use warnings;
+use Encode;
+our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+use XSLoader;
+XSLoader::load( __PACKAGE__, $VERSION );
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::EBCDIC - EBCDIC Encodings
+
+=head1 SYNOPSIS
+
+    use Encode qw/encode decode/; 
+    $posix_bc  = encode("posix-bc", $utf8); # loads Encode::EBCDIC implicitly
+    $utf8 = decode("", $posix_bc);          # ditto
+
+=head1 ABSTRACT
+
+This module implements various EBCDIC-Based encodings.  Encodings
+supported are as follows.   
+
+  Canonical   Alias		Description
+  --------------------------------------------------------------------
+  cp37  
+  cp500  
+  cp875  
+  cp1026  
+  cp1047  
+  posix-bc
+
+=head1 DESCRIPTION
+
+To find how to use this module in detail, see L<Encode>.
+
+=head1 SEE ALSO
+
+L<Encode>, L<perlebcdic>
+
+=cut
--- a/database/perl/lib/Encode/Encoder.pm
+++ b/database/perl/lib/Encode/Encoder.pm
@@ -0,0 +1,253 @@
+#
+# $Id: Encoder.pm,v 2.3 2013/09/14 07:51:59 dankogai Exp $
+#
+package Encode::Encoder;
+use strict;
+use warnings;
+our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+require Exporter;
+our @ISA       = qw(Exporter);
+our @EXPORT_OK = qw ( encoder );
+
+our $AUTOLOAD;
+use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
+use Encode qw(encode decode find_encoding from_to);
+use Carp;
+
+sub new {
+    my ( $class, $data, $encname ) = @_;
+    unless ($encname) {
+        $encname = Encode::is_utf8($data) ? 'utf8' : '';
+    }
+    else {
+        my $obj = find_encoding($encname)
+          or croak __PACKAGE__, ": unknown encoding: $encname";
+        $encname = $obj->name;
+    }
+    my $self = {
+        data     => $data,
+        encoding => $encname,
+    };
+    bless $self => $class;
+}
+
+sub encoder { __PACKAGE__->new(@_) }
+
+sub data {
+    my ( $self, $data ) = @_;
+    if ( defined $data ) {
+        $self->{data} = $data;
+        return $data;
+    }
+    else {
+        return $self->{data};
+    }
+}
+
+sub encoding {
+    my ( $self, $encname ) = @_;
+    if ($encname) {
+        my $obj = find_encoding($encname)
+          or confess __PACKAGE__, ": unknown encoding: $encname";
+        $self->{encoding} = $obj->name;
+        return $self;
+    }
+    else {
+        return $self->{encoding};
+    }
+}
+
+sub bytes {
+    my ( $self, $encname ) = @_;
+    $encname ||= $self->{encoding};
+    my $obj = find_encoding($encname)
+      or confess __PACKAGE__, ": unknown encoding: $encname";
+    $self->{data} = $obj->decode( $self->{data}, 1 );
+    $self->{encoding} = '';
+    return $self;
+}
+
+sub DESTROY {    # defined so it won't autoload.
+    DEBUG and warn shift;
+}
+
+sub AUTOLOAD {
+    my $self = shift;
+    my $type = ref($self)
+      or confess "$self is not an object";
+    my $myname = $AUTOLOAD;
+    $myname =~ s/.*://;    # strip fully-qualified portion
+    my $obj = find_encoding($myname)
+      or confess __PACKAGE__, ": unknown encoding: $myname";
+    DEBUG and warn $self->{encoding}, " => ", $obj->name;
+    if ( $self->{encoding} ) {
+        from_to( $self->{data}, $self->{encoding}, $obj->name, 1 );
+    }
+    else {
+        $self->{data} = $obj->encode( $self->{data}, 1 );
+    }
+    $self->{encoding} = $obj->name;
+    return $self;
+}
+
+use overload
+  q("") => sub { $_[0]->{data} },
+  q(0+) => sub { use bytes(); bytes::length( $_[0]->{data} ) },
+  fallback => 1,
+  ;
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::Encoder -- Object Oriented Encoder
+
+=head1 SYNOPSIS
+
+  use Encode::Encoder;
+  # Encode::encode("ISO-8859-1", $data); 
+  Encode::Encoder->new($data)->iso_8859_1; # OOP way
+  # shortcut
+  use Encode::Encoder qw(encoder);
+  encoder($data)->iso_8859_1;
+  # you can stack them!
+  encoder($data)->iso_8859_1->base64;  # provided base64() is defined
+  # you can use it as a decoder as well
+  encoder($base64)->bytes('base64')->latin1;
+  # stringified
+  print encoder($data)->utf8->latin1;  # prints the string in latin1
+  # numified
+  encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data)
+
+=head1 ABSTRACT
+
+B<Encode::Encoder> allows you to use Encode in an object-oriented
+style.  This is not only more intuitive than a functional approach,
+but also handier when you want to stack encodings.  Suppose you want
+your UTF-8 string converted to Latin1 then Base64: you can simply say
+
+  my $base64 = encoder($utf8)->latin1->base64;
+
+instead of
+
+  my $latin1 = encode("latin1", $utf8);
+  my $base64 = encode_base64($utf8);
+
+or the lazier and more convoluted
+
+  my $base64 = encode_base64(encode("latin1", $utf8));
+
+=head1 Description
+
+Here is how to use this module.
+
+=over 4
+
+=item *
+
+There are at least two instance variables stored in a hash reference,
+{data} and {encoding}.
+
+=item *
+
+When there is no method, it takes the method name as the name of the
+encoding and encodes the instance I<data> with I<encoding>.  If successful,
+the instance I<encoding> is set accordingly.
+
+=item *
+
+You can retrieve the result via -E<gt>data but usually you don't have to 
+because the stringify operator ("") is overridden to do exactly that.
+
+=back
+
+=head2 Predefined Methods
+
+This module predefines the methods below:
+
+=over 4
+
+=item $e = Encode::Encoder-E<gt>new([$data, $encoding]);
+
+returns an encoder object.  Its data is initialized with $data if
+present, and its encoding is set to $encoding if present.
+
+When $encoding is omitted, it defaults to utf8 if $data is already in
+utf8 or "" (empty string) otherwise.
+
+=item encoder()
+
+is an alias of Encode::Encoder-E<gt>new().  This one is exported on demand.
+
+=item $e-E<gt>data([$data])
+
+When $data is present, sets the instance data to $data and returns the
+object itself.  Otherwise, the current instance data is returned.
+
+=item $e-E<gt>encoding([$encoding])
+
+When $encoding is present, sets the instance encoding to $encoding and
+returns the object itself.  Otherwise, the current instance encoding is
+returned.
+
+=item $e-E<gt>bytes([$encoding])
+
+decodes instance data from $encoding, or the instance encoding if
+omitted.  If the conversion is successful, the instance encoding
+will be set to "".
+
+The name I<bytes> was deliberately picked to avoid namespace tainting
+-- this module may be used as a base class so method names that appear
+in Encode::Encoding are avoided.
+
+=back
+
+=head2 Example: base64 transcoder
+
+This module is designed to work with L<Encode::Encoding>.
+To make the Base64 transcoder example above really work, you could
+write a module like this:
+
+  package Encode::Base64;
+  use parent 'Encode::Encoding';
+  __PACKAGE__->Define('base64');
+  use MIME::Base64;
+  sub encode{ 
+      my ($obj, $data) = @_; 
+      return encode_base64($data);
+  }
+  sub decode{
+      my ($obj, $data) = @_; 
+      return decode_base64($data);
+  }
+  1;
+  __END__
+
+And your caller module would be something like this:
+
+  use Encode::Encoder;
+  use Encode::Base64;
+
+  # now you can really do the following
+
+  encoder($data)->iso_8859_1->base64;
+  encoder($base64)->bytes('base64')->latin1;
+
+=head2 Operator Overloading
+
+This module overloads two operators, stringify ("") and numify (0+).
+
+Stringify dumps the data inside the object.
+
+Numify returns the number of bytes in the instance data.
+
+They come in handy when you want to print or find the size of data.
+
+=head1 SEE ALSO
+
+L<Encode>,
+L<Encode::Encoding>
+
+=cut
--- a/database/perl/lib/Encode/Encoding.pm
+++ b/database/perl/lib/Encode/Encoding.pm
@@ -0,0 +1,356 @@
+package Encode::Encoding;
+
+# Base class for classes which implement encodings
+use strict;
+use warnings;
+our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+our @CARP_NOT = qw(Encode Encode::Encoder);
+
+use Carp ();
+use Encode ();
+use Encode::MIME::Name;
+
+use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
+
+sub Define {
+    my $obj       = shift;
+    my $canonical = shift;
+    $obj = bless { Name => $canonical }, $obj unless ref $obj;
+
+    # warn "$canonical => $obj\n";
+    Encode::define_encoding( $obj, $canonical, @_ );
+}
+
+sub name { return shift->{'Name'} }
+
+sub mime_name {
+    return Encode::MIME::Name::get_mime_name(shift->name);
+}
+
+sub renew {
+    my $self = shift;
+    my $clone = bless {%$self} => ref($self);
+    $clone->{renewed}++;    # so the caller can see it
+    DEBUG and warn $clone->{renewed};
+    return $clone;
+}
+
+sub renewed { return $_[0]->{renewed} || 0 }
+
+*new_sequence = \&renew;
+
+sub needs_lines { 0 }
+
+sub perlio_ok {
+    return eval { require PerlIO::encoding } ? 1 : 0;
+}
+
+# (Temporary|legacy) methods
+
+sub toUnicode   { shift->decode(@_) }
+sub fromUnicode { shift->encode(@_) }
+
+#
+# Needs to be overloaded or just croak
+#
+
+sub encode {
+    my $obj = shift;
+    my $class = ref($obj) ? ref($obj) : $obj;
+    Carp::croak( $class . "->encode() not defined!" );
+}
+
+sub decode {
+    my $obj = shift;
+    my $class = ref($obj) ? ref($obj) : $obj;
+    Carp::croak( $class . "->encode() not defined!" );
+}
+
+sub DESTROY { }
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::Encoding - Encode Implementation Base Class
+
+=head1 SYNOPSIS
+
+  package Encode::MyEncoding;
+  use parent qw(Encode::Encoding);
+
+  __PACKAGE__->Define(qw(myCanonical myAlias));
+
+=head1 DESCRIPTION
+
+As mentioned in L<Encode>, encodings are (in the current
+implementation at least) defined as objects. The mapping of encoding
+name to object is via the C<%Encode::Encoding> hash.  Though you can
+directly manipulate this hash, it is strongly encouraged to use this
+base class module and add encode() and decode() methods.
+
+=head2 Methods you should implement
+
+You are strongly encouraged to implement methods below, at least
+either encode() or decode().
+
+=over 4
+
+=item -E<gt>encode($string [,$check])
+
+MUST return the octet sequence representing I<$string>. 
+
+=over 2
+
+=item *
+
+If I<$check> is true, it SHOULD modify I<$string> in place to remove
+the converted part (i.e.  the whole string unless there is an error).
+If perlio_ok() is true, SHOULD becomes MUST.
+
+=item *
+
+If an error occurs, it SHOULD return the octet sequence for the
+fragment of string that has been converted and modify $string in-place
+to remove the converted part leaving it starting with the problem
+fragment.  If perlio_ok() is true, SHOULD becomes MUST.
+
+=item *
+
+If I<$check> is false then C<encode> MUST  make a "best effort" to
+convert the string - for example, by using a replacement character.
+
+=back
+
+=item -E<gt>decode($octets [,$check])
+
+MUST return the string that I<$octets> represents.
+
+=over 2
+
+=item *
+
+If I<$check> is true, it SHOULD modify I<$octets> in place to remove
+the converted part (i.e.  the whole sequence unless there is an
+error).  If perlio_ok() is true, SHOULD becomes MUST.
+
+=item *
+
+If an error occurs, it SHOULD return the fragment of string that has
+been converted and modify $octets in-place to remove the converted
+part leaving it starting with the problem fragment.  If perlio_ok() is
+true, SHOULD becomes MUST.
+
+=item *
+
+If I<$check> is false then C<decode> should make a "best effort" to
+convert the string - for example by using Unicode's "\x{FFFD}" as a
+replacement character.
+
+=back
+
+=back
+
+If you want your encoding to work with L<encoding> pragma, you should
+also implement the method below.
+
+=over 4
+
+=item -E<gt>cat_decode($destination, $octets, $offset, $terminator [,$check])
+
+MUST decode I<$octets> with I<$offset> and concatenate it to I<$destination>.
+Decoding will terminate when $terminator (a string) appears in output.
+I<$offset> will be modified to the last $octets position at end of decode.
+Returns true if $terminator appears output, else returns false.
+
+=back
+
+=head2 Other methods defined in Encode::Encodings
+
+You do not have to override methods shown below unless you have to.
+
+=over 4
+
+=item -E<gt>name
+
+Predefined As:
+
+  sub name  { return shift->{'Name'} }
+
+MUST return the string representing the canonical name of the encoding.
+
+=item -E<gt>mime_name
+
+Predefined As:
+
+  sub mime_name{
+    return Encode::MIME::Name::get_mime_name(shift->name);
+  }
+
+MUST return the string representing the IANA charset name of the encoding.
+
+=item -E<gt>renew
+
+Predefined As:
+
+  sub renew {
+    my $self = shift;
+    my $clone = bless { %$self } => ref($self);
+    $clone->{renewed}++;
+    return $clone;
+  }
+
+This method reconstructs the encoding object if necessary.  If you need
+to store the state during encoding, this is where you clone your object.
+
+PerlIO ALWAYS calls this method to make sure it has its own private
+encoding object.
+
+=item -E<gt>renewed
+
+Predefined As:
+
+  sub renewed { $_[0]->{renewed} || 0 }
+
+Tells whether the object is renewed (and how many times).  Some
+modules emit C<Use of uninitialized value in null operation> warning
+unless the value is numeric so return 0 for false.
+
+=item -E<gt>perlio_ok()
+
+Predefined As:
+
+  sub perlio_ok { 
+    return eval { require PerlIO::encoding } ? 1 : 0;
+  }
+
+If your encoding does not support PerlIO for some reasons, just;
+
+ sub perlio_ok { 0 }
+
+=item -E<gt>needs_lines()
+
+Predefined As:
+
+  sub needs_lines { 0 };
+
+If your encoding can work with PerlIO but needs line buffering, you
+MUST define this method so it returns true.  7bit ISO-2022 encodings
+are one example that needs this.  When this method is missing, false
+is assumed.
+
+=back
+
+=head2 Example: Encode::ROT13
+
+  package Encode::ROT13;
+  use strict;
+  use parent qw(Encode::Encoding);
+
+  __PACKAGE__->Define('rot13');
+
+  sub encode($$;$){
+      my ($obj, $str, $chk) = @_;
+      $str =~ tr/A-Za-z/N-ZA-Mn-za-m/;
+      $_[1] = '' if $chk; # this is what in-place edit means
+      return $str;
+  }
+
+  # Jr pna or ynml yvxr guvf;
+  *decode = \&encode;
+
+  1;
+
+=head1 Why the heck Encode API is different?
+
+It should be noted that the I<$check> behaviour is different from the
+outer public API. The logic is that the "unchecked" case is useful
+when the encoding is part of a stream which may be reporting errors
+(e.g. STDERR).  In such cases, it is desirable to get everything
+through somehow without causing additional errors which obscure the
+original one. Also, the encoding is best placed to know what the
+correct replacement character is, so if that is the desired behaviour
+then letting low level code do it is the most efficient.
+
+By contrast, if I<$check> is true, the scheme above allows the
+encoding to do as much as it can and tell the layer above how much
+that was. What is lacking at present is a mechanism to report what
+went wrong. The most likely interface will be an additional method
+call to the object, or perhaps (to avoid forcing per-stream objects
+on otherwise stateless encodings) an additional parameter.
+
+It is also highly desirable that encoding classes inherit from
+C<Encode::Encoding> as a base class. This allows that class to define
+additional behaviour for all encoding objects.
+
+  package Encode::MyEncoding;
+  use parent qw(Encode::Encoding);
+
+  __PACKAGE__->Define(qw(myCanonical myAlias));
+
+to create an object with C<< bless {Name => ...}, $class >>, and call
+define_encoding.  They inherit their C<name> method from
+C<Encode::Encoding>.
+
+=head2 Compiled Encodings
+
+For the sake of speed and efficiency, most of the encodings are now
+supported via a I<compiled form>: XS modules generated from UCM
+files.   Encode provides the enc2xs tool to achieve that.  Please see
+L<enc2xs> for more details.
+
+=head1 SEE ALSO
+
+L<perlmod>, L<enc2xs>
+
+=begin future
+
+=over 4
+
+=item Scheme 1
+
+The fixup routine gets passed the remaining fragment of string being
+processed.  It modifies it in place to remove bytes/characters it can
+understand and returns a string used to represent them.  For example:
+
+ sub fixup {
+   my $ch = substr($_[0],0,1,'');
+   return sprintf("\x{%02X}",ord($ch);
+ }
+
+This scheme is close to how the underlying C code for Encode works,
+but gives the fixup routine very little context.
+
+=item Scheme 2
+
+The fixup routine gets passed the original string, an index into
+it of the problem area, and the output string so far.  It appends
+what it wants to the output string and returns a new index into the
+original string.  For example:
+
+ sub fixup {
+   # my ($s,$i,$d) = @_;
+   my $ch = substr($_[0],$_[1],1);
+   $_[2] .= sprintf("\x{%02X}",ord($ch);
+   return $_[1]+1;
+ }
+
+This scheme gives maximal control to the fixup routine but is more
+complicated to code, and may require that the internals of Encode be tweaked to
+keep the original string intact.
+
+=item Other Schemes
+
+Hybrids of the above.
+
+Multiple return values rather than in-place modifications.
+
+Index into the string could be C<pos($str)> allowing C<s/\G...//>.
+
+=back
+
+=end future
+
+=cut
--- a/database/perl/lib/Encode/GSM0338.pm
+++ b/database/perl/lib/Encode/GSM0338.pm
@@ -0,0 +1,289 @@
+#
+# $Id: GSM0338.pm,v 2.9 2020/12/02 01:28:17 dankogai Exp dankogai $
+#
+package Encode::GSM0338;
+
+use strict;
+use warnings;
+use Carp;
+
+use vars qw($VERSION);
+$VERSION = do { my @r = ( q$Revision: 2.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+use Encode qw(:fallbacks);
+
+use parent qw(Encode::Encoding);
+__PACKAGE__->Define('gsm0338');
+
+use utf8;
+
+# Mapping table according to 3GPP TS 23.038 version 16.0.0 Release 16 and ETSI TS 123 038 V16.0.0 (2020-07)
+# https://www.etsi.org/deliver/etsi_ts/123000_123099/123038/16.00.00_60/ts_123038v160000p.pdf (page 20 and 22)
+our %UNI2GSM = (
+    "\x{000A}" => "\x0A",        # LINE FEED
+    "\x{000C}" => "\x1B\x0A",    # FORM FEED
+    "\x{000D}" => "\x0D",        # CARRIAGE RETURN
+    "\x{0020}" => "\x20",        # SPACE
+    "\x{0021}" => "\x21",        # EXCLAMATION MARK
+    "\x{0022}" => "\x22",        # QUOTATION MARK
+    "\x{0023}" => "\x23",        # NUMBER SIGN
+    "\x{0024}" => "\x02",        # DOLLAR SIGN
+    "\x{0025}" => "\x25",        # PERCENT SIGN
+    "\x{0026}" => "\x26",        # AMPERSAND
+    "\x{0027}" => "\x27",        # APOSTROPHE
+    "\x{0028}" => "\x28",        # LEFT PARENTHESIS
+    "\x{0029}" => "\x29",        # RIGHT PARENTHESIS
+    "\x{002A}" => "\x2A",        # ASTERISK
+    "\x{002B}" => "\x2B",        # PLUS SIGN
+    "\x{002C}" => "\x2C",        # COMMA
+    "\x{002D}" => "\x2D",        # HYPHEN-MINUS
+    "\x{002E}" => "\x2E",        # FULL STOP
+    "\x{002F}" => "\x2F",        # SOLIDUS
+    "\x{0030}" => "\x30",        # DIGIT ZERO
+    "\x{0031}" => "\x31",        # DIGIT ONE
+    "\x{0032}" => "\x32",        # DIGIT TWO
+    "\x{0033}" => "\x33",        # DIGIT THREE
+    "\x{0034}" => "\x34",        # DIGIT FOUR
+    "\x{0035}" => "\x35",        # DIGIT FIVE
+    "\x{0036}" => "\x36",        # DIGIT SIX
+    "\x{0037}" => "\x37",        # DIGIT SEVEN
+    "\x{0038}" => "\x38",        # DIGIT EIGHT
+    "\x{0039}" => "\x39",        # DIGIT NINE
+    "\x{003A}" => "\x3A",        # COLON
+    "\x{003B}" => "\x3B",        # SEMICOLON
+    "\x{003C}" => "\x3C",        # LESS-THAN SIGN
+    "\x{003D}" => "\x3D",        # EQUALS SIGN
+    "\x{003E}" => "\x3E",        # GREATER-THAN SIGN
+    "\x{003F}" => "\x3F",        # QUESTION MARK
+    "\x{0040}" => "\x00",        # COMMERCIAL AT
+    "\x{0041}" => "\x41",        # LATIN CAPITAL LETTER A
+    "\x{0042}" => "\x42",        # LATIN CAPITAL LETTER B
+    "\x{0043}" => "\x43",        # LATIN CAPITAL LETTER C
+    "\x{0044}" => "\x44",        # LATIN CAPITAL LETTER D
+    "\x{0045}" => "\x45",        # LATIN CAPITAL LETTER E
+    "\x{0046}" => "\x46",        # LATIN CAPITAL LETTER F
+    "\x{0047}" => "\x47",        # LATIN CAPITAL LETTER G
+    "\x{0048}" => "\x48",        # LATIN CAPITAL LETTER H
+    "\x{0049}" => "\x49",        # LATIN CAPITAL LETTER I
+    "\x{004A}" => "\x4A",        # LATIN CAPITAL LETTER J
+    "\x{004B}" => "\x4B",        # LATIN CAPITAL LETTER K
+    "\x{004C}" => "\x4C",        # LATIN CAPITAL LETTER L
+    "\x{004D}" => "\x4D",        # LATIN CAPITAL LETTER M
+    "\x{004E}" => "\x4E",        # LATIN CAPITAL LETTER N
+    "\x{004F}" => "\x4F",        # LATIN CAPITAL LETTER O
+    "\x{0050}" => "\x50",        # LATIN CAPITAL LETTER P
+    "\x{0051}" => "\x51",        # LATIN CAPITAL LETTER Q
+    "\x{0052}" => "\x52",        # LATIN CAPITAL LETTER R
+    "\x{0053}" => "\x53",        # LATIN CAPITAL LETTER S
+    "\x{0054}" => "\x54",        # LATIN CAPITAL LETTER T
+    "\x{0055}" => "\x55",        # LATIN CAPITAL LETTER U
+    "\x{0056}" => "\x56",        # LATIN CAPITAL LETTER V
+    "\x{0057}" => "\x57",        # LATIN CAPITAL LETTER W
+    "\x{0058}" => "\x58",        # LATIN CAPITAL LETTER X
+    "\x{0059}" => "\x59",        # LATIN CAPITAL LETTER Y
+    "\x{005A}" => "\x5A",        # LATIN CAPITAL LETTER Z
+    "\x{005B}" => "\x1B\x3C",    # LEFT SQUARE BRACKET
+    "\x{005C}" => "\x1B\x2F",    # REVERSE SOLIDUS
+    "\x{005D}" => "\x1B\x3E",    # RIGHT SQUARE BRACKET
+    "\x{005E}" => "\x1B\x14",    # CIRCUMFLEX ACCENT
+    "\x{005F}" => "\x11",        # LOW LINE
+    "\x{0061}" => "\x61",        # LATIN SMALL LETTER A
+    "\x{0062}" => "\x62",        # LATIN SMALL LETTER B
+    "\x{0063}" => "\x63",        # LATIN SMALL LETTER C
+    "\x{0064}" => "\x64",        # LATIN SMALL LETTER D
+    "\x{0065}" => "\x65",        # LATIN SMALL LETTER E
+    "\x{0066}" => "\x66",        # LATIN SMALL LETTER F
+    "\x{0067}" => "\x67",        # LATIN SMALL LETTER G
+    "\x{0068}" => "\x68",        # LATIN SMALL LETTER H
+    "\x{0069}" => "\x69",        # LATIN SMALL LETTER I
+    "\x{006A}" => "\x6A",        # LATIN SMALL LETTER J
+    "\x{006B}" => "\x6B",        # LATIN SMALL LETTER K
+    "\x{006C}" => "\x6C",        # LATIN SMALL LETTER L
+    "\x{006D}" => "\x6D",        # LATIN SMALL LETTER M
+    "\x{006E}" => "\x6E",        # LATIN SMALL LETTER N
+    "\x{006F}" => "\x6F",        # LATIN SMALL LETTER O
+    "\x{0070}" => "\x70",        # LATIN SMALL LETTER P
+    "\x{0071}" => "\x71",        # LATIN SMALL LETTER Q
+    "\x{0072}" => "\x72",        # LATIN SMALL LETTER R
+    "\x{0073}" => "\x73",        # LATIN SMALL LETTER S
+    "\x{0074}" => "\x74",        # LATIN SMALL LETTER T
+    "\x{0075}" => "\x75",        # LATIN SMALL LETTER U
+    "\x{0076}" => "\x76",        # LATIN SMALL LETTER V
+    "\x{0077}" => "\x77",        # LATIN SMALL LETTER W
+    "\x{0078}" => "\x78",        # LATIN SMALL LETTER X
+    "\x{0079}" => "\x79",        # LATIN SMALL LETTER Y
+    "\x{007A}" => "\x7A",        # LATIN SMALL LETTER Z
+    "\x{007B}" => "\x1B\x28",    # LEFT CURLY BRACKET
+    "\x{007C}" => "\x1B\x40",    # VERTICAL LINE
+    "\x{007D}" => "\x1B\x29",    # RIGHT CURLY BRACKET
+    "\x{007E}" => "\x1B\x3D",    # TILDE
+    "\x{00A1}" => "\x40",        # INVERTED EXCLAMATION MARK
+    "\x{00A3}" => "\x01",        # POUND SIGN
+    "\x{00A4}" => "\x24",        # CURRENCY SIGN
+    "\x{00A5}" => "\x03",        # YEN SIGN
+    "\x{00A7}" => "\x5F",        # SECTION SIGN
+    "\x{00BF}" => "\x60",        # INVERTED QUESTION MARK
+    "\x{00C4}" => "\x5B",        # LATIN CAPITAL LETTER A WITH DIAERESIS
+    "\x{00C5}" => "\x0E",        # LATIN CAPITAL LETTER A WITH RING ABOVE
+    "\x{00C6}" => "\x1C",        # LATIN CAPITAL LETTER AE
+    "\x{00C7}" => "\x09",        # LATIN CAPITAL LETTER C WITH CEDILLA
+    "\x{00C9}" => "\x1F",        # LATIN CAPITAL LETTER E WITH ACUTE
+    "\x{00D1}" => "\x5D",        # LATIN CAPITAL LETTER N WITH TILDE
+    "\x{00D6}" => "\x5C",        # LATIN CAPITAL LETTER O WITH DIAERESIS
+    "\x{00D8}" => "\x0B",        # LATIN CAPITAL LETTER O WITH STROKE
+    "\x{00DC}" => "\x5E",        # LATIN CAPITAL LETTER U WITH DIAERESIS
+    "\x{00DF}" => "\x1E",        # LATIN SMALL LETTER SHARP S
+    "\x{00E0}" => "\x7F",        # LATIN SMALL LETTER A WITH GRAVE
+    "\x{00E4}" => "\x7B",        # LATIN SMALL LETTER A WITH DIAERESIS
+    "\x{00E5}" => "\x0F",        # LATIN SMALL LETTER A WITH RING ABOVE
+    "\x{00E6}" => "\x1D",        # LATIN SMALL LETTER AE
+    "\x{00E8}" => "\x04",        # LATIN SMALL LETTER E WITH GRAVE
+    "\x{00E9}" => "\x05",        # LATIN SMALL LETTER E WITH ACUTE
+    "\x{00EC}" => "\x07",        # LATIN SMALL LETTER I WITH GRAVE
+    "\x{00F1}" => "\x7D",        # LATIN SMALL LETTER N WITH TILDE
+    "\x{00F2}" => "\x08",        # LATIN SMALL LETTER O WITH GRAVE
+    "\x{00F6}" => "\x7C",        # LATIN SMALL LETTER O WITH DIAERESIS
+    "\x{00F8}" => "\x0C",        # LATIN SMALL LETTER O WITH STROKE
+    "\x{00F9}" => "\x06",        # LATIN SMALL LETTER U WITH GRAVE
+    "\x{00FC}" => "\x7E",        # LATIN SMALL LETTER U WITH DIAERESIS
+    "\x{0393}" => "\x13",        # GREEK CAPITAL LETTER GAMMA
+    "\x{0394}" => "\x10",        # GREEK CAPITAL LETTER DELTA
+    "\x{0398}" => "\x19",        # GREEK CAPITAL LETTER THETA
+    "\x{039B}" => "\x14",        # GREEK CAPITAL LETTER LAMDA
+    "\x{039E}" => "\x1A",        # GREEK CAPITAL LETTER XI
+    "\x{03A0}" => "\x16",        # GREEK CAPITAL LETTER PI
+    "\x{03A3}" => "\x18",        # GREEK CAPITAL LETTER SIGMA
+    "\x{03A6}" => "\x12",        # GREEK CAPITAL LETTER PHI
+    "\x{03A8}" => "\x17",        # GREEK CAPITAL LETTER PSI
+    "\x{03A9}" => "\x15",        # GREEK CAPITAL LETTER OMEGA
+    "\x{20AC}" => "\x1B\x65",    # EURO SIGN
+);
+our %GSM2UNI = reverse %UNI2GSM;
+our $ESC    = "\x1b";
+
+sub decode ($$;$) {
+    my ( $obj, $bytes, $chk ) = @_;
+    return undef unless defined $bytes;
+    my $str = substr($bytes, 0, 0); # to propagate taintedness;
+    while ( length $bytes ) {
+        my $seq = '';
+        my $c;
+        do {
+            $c = substr( $bytes, 0, 1, '' );
+            $seq .= $c;
+        } while ( length $bytes and $c eq $ESC );
+        my $u =
+            exists $GSM2UNI{$seq}
+            ? $GSM2UNI{$seq}
+            : ($chk && ref $chk eq 'CODE')
+            ? $chk->( unpack 'C*', $seq )
+            : "\x{FFFD}";
+        if ( not exists $GSM2UNI{$seq} and $chk and not ref $chk ) {
+            if ( substr($seq, 0, 1) eq $ESC and ($chk & Encode::STOP_AT_PARTIAL) ) {
+                $bytes .= $seq;
+                last;
+            }
+            croak join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq ) . ' does not map to Unicode' if $chk & Encode::DIE_ON_ERR;
+            carp join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq ) . ' does not map to Unicode' if $chk & Encode::WARN_ON_ERR;
+            if ($chk & Encode::RETURN_ON_ERR) {
+                $bytes .= $seq;
+                last;
+            }
+        }
+        $str .= $u;
+    }
+    $_[1] = $bytes if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
+    return $str;
+}
+
+sub encode($$;$) {
+    my ( $obj, $str, $chk ) = @_;
+    return undef unless defined $str;
+    my $bytes = substr($str, 0, 0); # to propagate taintedness
+    while ( length $str ) {
+        my $u = substr( $str, 0, 1, '' );
+        my $c;
+        my $seq =
+            exists $UNI2GSM{$u}
+            ? $UNI2GSM{$u}
+            : ($chk && ref $chk eq 'CODE')
+            ? $chk->( ord($u) )
+            : $UNI2GSM{'?'};
+        if ( not exists $UNI2GSM{$u} and $chk and not ref $chk ) {
+            croak sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name ) if $chk & Encode::DIE_ON_ERR;
+            carp sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name ) if $chk & Encode::WARN_ON_ERR;
+            if ($chk & Encode::RETURN_ON_ERR) {
+                $str .= $u;
+                last;
+            }
+        }
+        $bytes .= $seq;
+    }
+    $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
+    return $bytes;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::GSM0338 -- ETSI GSM 03.38 Encoding
+
+=head1 SYNOPSIS
+
+  use Encode qw/encode decode/;
+  $gsm0338 = encode("gsm0338", $unicode); # loads Encode::GSM0338 implicitly
+  $unicode = decode("gsm0338", $gsm0338); # ditto
+
+=head1 DESCRIPTION
+
+GSM0338 is for GSM handsets. Though it shares alphanumerals with ASCII,
+control character ranges and other parts are mapped very differently,
+mainly to store Greek characters.  There are also escape sequences
+(starting with 0x1B) to cover e.g. the Euro sign.
+
+This was once handled by L<Encode::Bytes> but because of all those
+unusual specifications, Encode 2.20 has relocated the support to
+this module.
+
+This module implements only I<GSM 7 bit Default Alphabet> and
+I<GSM 7 bit default alphabet extension table> according to standard
+3GPP TS 23.038 version 16. Therefore I<National Language Single Shift>
+and I<National Language Locking Shift> are not implemented nor supported.
+
+=head2 Septets
+
+This modules operates with octets (like any other Encode module) and not
+with packed septets (unlike other GSM standards). Therefore for processing
+binary SMS or parts of GSM TPDU payload (3GPP TS 23.040) it is needed to do
+conversion between octets and packed septets. For this purpose perl's C<pack>
+and C<unpack> functions may be useful:
+
+  $bytes = substr(pack('(b*)*', unpack '(A7)*', unpack 'b*', $septets), 0, $num_of_septets);
+  $unicode = decode('GSM0338', $bytes);
+
+  $bytes = encode('GSM0338', $unicode);
+  $septets = pack 'b*', join '', map { substr $_, 0, 7 } unpack '(A8)*', unpack 'b*', $bytes;
+  $num_of_septets = length $bytes;
+
+Please note that for correct decoding of packed septets it is required to
+know number of septets packed in binary buffer as binary buffer is always
+padded with zero bits and 7 zero bits represents character C<@>. Number
+of septets is also stored in TPDU payload when dealing with 3GPP TS 23.040.
+
+=head1 BUGS
+
+Encode::GSM0338 2.7 and older versions (part of Encode 3.06) incorrectly
+handled zero bytes (character C<@>). This was fixed in Encode::GSM0338
+version 2.8 (part of Encode 3.07).
+
+=head1 SEE ALSO
+
+L<3GPP TS 23.038|https://www.3gpp.org/dynareport/23038.htm>
+
+L<ETSI TS 123 038 V16.0.0 (2020-07)|https://www.etsi.org/deliver/etsi_ts/123000_123099/123038/16.00.00_60/ts_123038v160000p.pdf>
+
+L<Encode>
+
+=cut
--- a/database/perl/lib/Encode/Guess.pm
+++ b/database/perl/lib/Encode/Guess.pm
@@ -0,0 +1,356 @@
+package Encode::Guess;
+use strict;
+use warnings;
+use Encode qw(:fallbacks find_encoding);
+our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+my $Canon = 'Guess';
+use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
+our %DEF_SUSPECTS = map { $_ => find_encoding($_) } qw(ascii utf8);
+my $obj = bless {
+    Name     => $Canon,
+    Suspects => {%DEF_SUSPECTS},
+} => __PACKAGE__;
+Encode::define_encoding($obj, $Canon);
+
+use parent qw(Encode::Encoding);
+sub needs_lines { 1 }
+sub perlio_ok   { 0 }
+
+our @EXPORT         = qw(guess_encoding);
+our $NoUTFAutoGuess = 0;
+our $UTF8_BOM       = pack( "C3", 0xef, 0xbb, 0xbf );
+
+sub import {    # Exporter not used so we do it on our own
+    my $callpkg = caller;
+    for my $item (@EXPORT) {
+        no strict 'refs';
+        *{"$callpkg\::$item"} = \&{"$item"};
+    }
+    set_suspects(@_);
+}
+
+sub set_suspects {
+    my $class = shift;
+    my $self = ref($class) ? $class : $Encode::Encoding{$Canon};
+    $self->{Suspects} = {%DEF_SUSPECTS};
+    $self->add_suspects(@_);
+}
+
+sub add_suspects {
+    my $class = shift;
+    my $self = ref($class) ? $class : $Encode::Encoding{$Canon};
+    for my $c (@_) {
+        my $e = find_encoding($c) or die "Unknown encoding: $c";
+        $self->{Suspects}{ $e->name } = $e;
+        DEBUG and warn "Added: ", $e->name;
+    }
+}
+
+sub decode($$;$) {
+    my ( $obj, $octet, $chk ) = @_;
+    my $guessed = guess( $obj, $octet );
+    unless ( ref($guessed) ) {
+        require Carp;
+        Carp::croak($guessed);
+    }
+    my $utf8 = $guessed->decode( $octet, $chk || 0 );
+    $_[1] = $octet if $chk;
+    return $utf8;
+}
+
+sub guess_encoding {
+    guess( $Encode::Encoding{$Canon}, @_ );
+}
+
+sub guess {
+    my $class = shift;
+    my $obj   = ref($class) ? $class : $Encode::Encoding{$Canon};
+    my $octet = shift;
+
+    # sanity check
+    return "Empty string, empty guess" unless defined $octet and length $octet;
+
+    # cheat 0: utf8 flag;
+    if ( Encode::is_utf8($octet) ) {
+        return find_encoding('utf8') unless $NoUTFAutoGuess;
+        Encode::_utf8_off($octet);
+    }
+
+    # cheat 1: BOM
+    use Encode::Unicode;
+    unless ($NoUTFAutoGuess) {
+        my $BOM = pack( 'C3', unpack( "C3", $octet ) );
+        return find_encoding('utf8')
+          if ( defined $BOM and $BOM eq $UTF8_BOM );
+        $BOM = unpack( 'N', $octet );
+        return find_encoding('UTF-32')
+          if ( defined $BOM and ( $BOM == 0xFeFF or $BOM == 0xFFFe0000 ) );
+        $BOM = unpack( 'n', $octet );
+        return find_encoding('UTF-16')
+          if ( defined $BOM and ( $BOM == 0xFeFF or $BOM == 0xFFFe ) );
+        if ( $octet =~ /\x00/o )
+        {    # if \x00 found, we assume UTF-(16|32)(BE|LE)
+            my $utf;
+            my ( $be, $le ) = ( 0, 0 );
+            if ( $octet =~ /\x00\x00/o ) {    # UTF-32(BE|LE) assumed
+                $utf = "UTF-32";
+                for my $char ( unpack( 'N*', $octet ) ) {
+                    $char & 0x0000ffff and $be++;
+                    $char & 0xffff0000 and $le++;
+                }
+            }
+            else {                            # UTF-16(BE|LE) assumed
+                $utf = "UTF-16";
+                for my $char ( unpack( 'n*', $octet ) ) {
+                    $char & 0x00ff and $be++;
+                    $char & 0xff00 and $le++;
+                }
+            }
+            DEBUG and warn "$utf, be == $be, le == $le";
+            $be == $le
+              and return
+              "Encodings ambiguous between $utf BE and LE ($be, $le)";
+            $utf .= ( $be > $le ) ? 'BE' : 'LE';
+            return find_encoding($utf);
+        }
+    }
+    my %try = %{ $obj->{Suspects} };
+    for my $c (@_) {
+        my $e = find_encoding($c) or die "Unknown encoding: $c";
+        $try{ $e->name } = $e;
+        DEBUG and warn "Added: ", $e->name;
+    }
+    my $nline = 1;
+    for my $line ( split /\r\n?|\n/, $octet ) {
+
+        # cheat 2 -- \e in the string
+        if ( $line =~ /\e/o ) {
+            my @keys = keys %try;
+            delete @try{qw/utf8 ascii/};
+            for my $k (@keys) {
+                ref( $try{$k} ) eq 'Encode::XS' and delete $try{$k};
+            }
+        }
+        my %ok = %try;
+
+        # warn join(",", keys %try);
+        for my $k ( keys %try ) {
+            my $scratch = $line;
+            $try{$k}->decode( $scratch, FB_QUIET );
+            if ( $scratch eq '' ) {
+                DEBUG and warn sprintf( "%4d:%-24s ok\n", $nline, $k );
+            }
+            else {
+                use bytes ();
+                DEBUG
+                  and warn sprintf( "%4d:%-24s not ok; %d bytes left\n",
+                    $nline, $k, bytes::length($scratch) );
+                delete $ok{$k};
+            }
+        }
+        %ok or return "No appropriate encodings found!";
+        if ( scalar( keys(%ok) ) == 1 ) {
+            my ($retval) = values(%ok);
+            return $retval;
+        }
+        %try = %ok;
+        $nline++;
+    }
+    $try{ascii}
+      or return "Encodings too ambiguous: " . join( " or ", keys %try );
+    return $try{ascii};
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::Guess -- Guesses encoding from data
+
+=head1 SYNOPSIS
+
+  # if you are sure $data won't contain anything bogus
+
+  use Encode;
+  use Encode::Guess qw/euc-jp shiftjis 7bit-jis/;
+  my $utf8 = decode("Guess", $data);
+  my $data = encode("Guess", $utf8);   # this doesn't work!
+
+  # more elaborate way
+  use Encode::Guess;
+  my $enc = guess_encoding($data, qw/euc-jp shiftjis 7bit-jis/);
+  ref($enc) or die "Can't guess: $enc"; # trap error this way
+  $utf8 = $enc->decode($data);
+  # or
+  $utf8 = decode($enc->name, $data)
+
+=head1 ABSTRACT
+
+Encode::Guess enables you to guess in what encoding a given data is
+encoded, or at least tries to.  
+
+=head1 DESCRIPTION
+
+By default, it checks only ascii, utf8 and UTF-16/32 with BOM.
+
+  use Encode::Guess; # ascii/utf8/BOMed UTF
+
+To use it more practically, you have to give the names of encodings to
+check (I<suspects> as follows).  The name of suspects can either be
+canonical names or aliases.
+
+CAVEAT: Unlike UTF-(16|32), BOM in utf8 is NOT AUTOMATICALLY STRIPPED.
+
+ # tries all major Japanese Encodings as well
+  use Encode::Guess qw/euc-jp shiftjis 7bit-jis/;
+
+If the C<$Encode::Guess::NoUTFAutoGuess> variable is set to a true
+value, no heuristics will be applied to UTF8/16/32, and the result
+will be limited to the suspects and C<ascii>.
+
+=over 4
+
+=item Encode::Guess->set_suspects
+
+You can also change the internal suspects list via C<set_suspects>
+method. 
+
+  use Encode::Guess;
+  Encode::Guess->set_suspects(qw/euc-jp shiftjis 7bit-jis/);
+
+=item Encode::Guess->add_suspects
+
+Or you can use C<add_suspects> method.  The difference is that
+C<set_suspects> flushes the current suspects list while
+C<add_suspects> adds.
+
+  use Encode::Guess;
+  Encode::Guess->add_suspects(qw/euc-jp shiftjis 7bit-jis/);
+  # now the suspects are euc-jp,shiftjis,7bit-jis, AND
+  # euc-kr,euc-cn, and big5-eten
+  Encode::Guess->add_suspects(qw/euc-kr euc-cn big5-eten/);
+
+=item Encode::decode("Guess" ...)
+
+When you are content with suspects list, you can now
+
+  my $utf8 = Encode::decode("Guess", $data);
+
+=item Encode::Guess->guess($data)
+
+But it will croak if:
+
+=over
+
+=item *
+
+Two or more suspects remain
+
+=item *
+
+No suspects left
+
+=back
+
+So you should instead try this;
+
+  my $decoder = Encode::Guess->guess($data);
+
+On success, $decoder is an object that is documented in
+L<Encode::Encoding>.  So you can now do this;
+
+  my $utf8 = $decoder->decode($data);
+
+On failure, $decoder now contains an error message so the whole thing
+would be as follows;
+
+  my $decoder = Encode::Guess->guess($data);
+  die $decoder unless ref($decoder);
+  my $utf8 = $decoder->decode($data);
+
+=item guess_encoding($data, [, I<list of suspects>])
+
+You can also try C<guess_encoding> function which is exported by
+default.  It takes $data to check and it also takes the list of
+suspects by option.  The optional suspect list is I<not reflected> to
+the internal suspects list.
+
+  my $decoder = guess_encoding($data, qw/euc-jp euc-kr euc-cn/);
+  die $decoder unless ref($decoder);
+  my $utf8 = $decoder->decode($data);
+  # check only ascii, utf8 and UTF-(16|32) with BOM
+  my $decoder = guess_encoding($data);
+
+=back
+
+=head1 CAVEATS
+
+=over 4
+
+=item *
+
+Because of the algorithm used, ISO-8859 series and other single-byte
+encodings do not work well unless either one of ISO-8859 is the only
+one suspect (besides ascii and utf8).
+
+  use Encode::Guess;
+  # perhaps ok
+  my $decoder = guess_encoding($data, 'latin1');
+  # definitely NOT ok
+  my $decoder = guess_encoding($data, qw/latin1 greek/);
+
+The reason is that Encode::Guess guesses encoding by trial and error.
+It first splits $data into lines and tries to decode the line for each
+suspect.  It keeps it going until all but one encoding is eliminated
+out of suspects list.  ISO-8859 series is just too successful for most
+cases (because it fills almost all code points in \x00-\xff).
+
+=item *
+
+Do not mix national standard encodings and the corresponding vendor
+encodings.
+
+  # a very bad idea
+  my $decoder
+     = guess_encoding($data, qw/shiftjis MacJapanese cp932/);
+
+The reason is that vendor encoding is usually a superset of national
+standard so it becomes too ambiguous for most cases.
+
+=item *
+
+On the other hand, mixing various national standard encodings
+automagically works unless $data is too short to allow for guessing.
+
+ # This is ok if $data is long enough
+ my $decoder =  
+  guess_encoding($data, qw/euc-cn
+                           euc-jp shiftjis 7bit-jis
+                           euc-kr
+                           big5-eten/);
+
+=item *
+
+DO NOT PUT TOO MANY SUSPECTS!  Don't you try something like this!
+
+  my $decoder = guess_encoding($data, 
+                               Encode->encodings(":all"));
+
+=back
+
+It is, after all, just a guess.  You should alway be explicit when it
+comes to encodings.  But there are some, especially Japanese,
+environment that guess-coding is a must.  Use this module with care. 
+
+=head1 TO DO
+
+Encode::Guess does not work on EBCDIC platforms.
+
+=head1 SEE ALSO
+
+L<Encode>, L<Encode::Encoding>
+
+=cut
+
--- a/database/perl/lib/Encode/JP.pm
+++ b/database/perl/lib/Encode/JP.pm
@@ -0,0 +1,95 @@
+package Encode::JP;
+BEGIN {
+    if ( ord("A") == 193 ) {
+        die "Encode::JP not supported on EBCDIC\n";
+    }
+}
+use strict;
+use warnings;
+use Encode;
+our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+use XSLoader;
+XSLoader::load( __PACKAGE__, $VERSION );
+
+use Encode::JP::JIS7;
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::JP - Japanese Encodings
+
+=head1 SYNOPSIS
+
+    use Encode qw/encode decode/; 
+    $euc_jp = encode("euc-jp", $utf8);   # loads Encode::JP implicitly
+    $utf8   = decode("euc-jp", $euc_jp); # ditto
+
+=head1 ABSTRACT
+
+This module implements Japanese charset encodings.  Encodings
+supported are as follows.
+
+  Canonical   Alias		Description
+  --------------------------------------------------------------------
+  euc-jp      /\beuc.*jp$/i	EUC (Extended Unix Character)
+              /\bjp.*euc/i   
+          /\bujis$/i
+  shiftjis    /\bshift.*jis$/i	Shift JIS (aka MS Kanji)
+          /\bsjis$/i
+  7bit-jis    /\bjis$/i		7bit JIS
+  iso-2022-jp			ISO-2022-JP                  [RFC1468]
+                = 7bit JIS with all Halfwidth Kana 
+                  converted to Fullwidth
+  iso-2022-jp-1			ISO-2022-JP-1                [RFC2237]
+                                = ISO-2022-JP with JIS X 0212-1990
+                  support.  See below
+  MacJapanese	                Shift JIS + Apple vendor mappings
+  cp932       /\bwindows-31j$/i Code Page 932
+                                = Shift JIS + MS/IBM vendor mappings
+  jis0201-raw                   JIS0201, raw format
+  jis0208-raw                   JIS0201, raw format
+  jis0212-raw                   JIS0201, raw format
+  --------------------------------------------------------------------
+
+=head1 DESCRIPTION
+
+To find out how to use this module in detail, see L<Encode>.
+
+=head1 Note on ISO-2022-JP(-1)?
+
+ISO-2022-JP-1 (RFC2237) is a superset of ISO-2022-JP (RFC1468) which
+adds support for JIS X 0212-1990.  That means you can use the same
+code to decode to utf8 but not vice versa.
+
+  $utf8 = decode('iso-2022-jp-1', $stream);
+
+and
+
+  $utf8 = decode('iso-2022-jp',   $stream);
+
+yield the same result but
+
+  $with_0212 = encode('iso-2022-jp-1', $utf8);
+
+is now different from
+
+  $without_0212 = encode('iso-2022-jp', $utf8 );
+
+In the latter case, characters that map to 0212 are first converted
+to U+3013 (0xA2AE in EUC-JP; a white square also known as 'Tofu' or
+'geta mark') then fed to the decoding engine.  U+FFFD is not used,
+in order to preserve text layout as much as possible.
+
+=head1 BUGS
+
+The ASCII region (0x00-0x7f) is preserved for all encodings, even
+though this conflicts with mappings by the Unicode Consortium.
+
+=head1 SEE ALSO
+
+L<Encode>
+
+=cut
--- a/database/perl/lib/Encode/JP/H2Z.pm
+++ b/database/perl/lib/Encode/JP/H2Z.pm
@@ -0,0 +1,176 @@
+#
+# $Id: H2Z.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $
+#
+
+package Encode::JP::H2Z;
+
+use strict;
+use warnings;
+
+our $RCSID = q$Id: H2Z.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $;
+our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+use Encode::CJKConstants qw(:all);
+
+use vars qw(%_D2Z  $_PAT_D2Z
+  %_Z2D  $_PAT_Z2D
+  %_H2Z  $_PAT_H2Z
+  %_Z2H  $_PAT_Z2H);
+
+%_H2Z = (
+    "\x8e\xa1" => "\xa1\xa3",    #<23><>
+    "\x8e\xa2" => "\xa1\xd6",    #<23><>
+    "\x8e\xa3" => "\xa1\xd7",    #<23><>
+    "\x8e\xa4" => "\xa1\xa2",    #<23><>
+    "\x8e\xa5" => "\xa1\xa6",    #<23><>
+    "\x8e\xa6" => "\xa5\xf2",    #<23><>
+    "\x8e\xa7" => "\xa5\xa1",    #<23><>
+    "\x8e\xa8" => "\xa5\xa3",    #<23><>
+    "\x8e\xa9" => "\xa5\xa5",    #<23><>
+    "\x8e\xaa" => "\xa5\xa7",    #<23><>
+    "\x8e\xab" => "\xa5\xa9",    #<23><>
+    "\x8e\xac" => "\xa5\xe3",    #<23><>
+    "\x8e\xad" => "\xa5\xe5",    #<23><>
+    "\x8e\xae" => "\xa5\xe7",    #<23><>
+    "\x8e\xaf" => "\xa5\xc3",    #<23><>
+    "\x8e\xb0" => "\xa1\xbc",    #<23><>
+    "\x8e\xb1" => "\xa5\xa2",    #<23><>
+    "\x8e\xb2" => "\xa5\xa4",    #<23><>
+    "\x8e\xb3" => "\xa5\xa6",    #<23><>
+    "\x8e\xb4" => "\xa5\xa8",    #<23><>
+    "\x8e\xb5" => "\xa5\xaa",    #<23><>
+    "\x8e\xb6" => "\xa5\xab",    #<23><>
+    "\x8e\xb7" => "\xa5\xad",    #<23><>
+    "\x8e\xb8" => "\xa5\xaf",    #<23><>
+    "\x8e\xb9" => "\xa5\xb1",    #<23><>
+    "\x8e\xba" => "\xa5\xb3",    #<23><>
+    "\x8e\xbb" => "\xa5\xb5",    #<23><>
+    "\x8e\xbc" => "\xa5\xb7",    #<23><>
+    "\x8e\xbd" => "\xa5\xb9",    #<23><>
+    "\x8e\xbe" => "\xa5\xbb",    #<23><>
+    "\x8e\xbf" => "\xa5\xbd",    #<23><>
+    "\x8e\xc0" => "\xa5\xbf",    #<23><>
+    "\x8e\xc1" => "\xa5\xc1",    #<23><>
+    "\x8e\xc2" => "\xa5\xc4",    #<23><>
+    "\x8e\xc3" => "\xa5\xc6",    #<23><>
+    "\x8e\xc4" => "\xa5\xc8",    #<23><>
+    "\x8e\xc5" => "\xa5\xca",    #<23><>
+    "\x8e\xc6" => "\xa5\xcb",    #<23><>
+    "\x8e\xc7" => "\xa5\xcc",    #<23><>
+    "\x8e\xc8" => "\xa5\xcd",    #<23><>
+    "\x8e\xc9" => "\xa5\xce",    #<23><>
+    "\x8e\xca" => "\xa5\xcf",    #<23><>
+    "\x8e\xcb" => "\xa5\xd2",    #<23><>
+    "\x8e\xcc" => "\xa5\xd5",    #<23><>
+    "\x8e\xcd" => "\xa5\xd8",    #<23><>
+    "\x8e\xce" => "\xa5\xdb",    #<23><>
+    "\x8e\xcf" => "\xa5\xde",    #<23><>
+    "\x8e\xd0" => "\xa5\xdf",    #<23><>
+    "\x8e\xd1" => "\xa5\xe0",    #<23><>
+    "\x8e\xd2" => "\xa5\xe1",    #<23><>
+    "\x8e\xd3" => "\xa5\xe2",    #<23><>
+    "\x8e\xd4" => "\xa5\xe4",    #<23><>
+    "\x8e\xd5" => "\xa5\xe6",    #<23><>
+    "\x8e\xd6" => "\xa5\xe8",    #<23><>
+    "\x8e\xd7" => "\xa5\xe9",    #<23><>
+    "\x8e\xd8" => "\xa5\xea",    #<23><>
+    "\x8e\xd9" => "\xa5\xeb",    #<23><>
+    "\x8e\xda" => "\xa5\xec",    #<23><>
+    "\x8e\xdb" => "\xa5\xed",    #<23><>
+    "\x8e\xdc" => "\xa5\xef",    #<23><>
+    "\x8e\xdd" => "\xa5\xf3",    #<23><>
+    "\x8e\xde" => "\xa1\xab",    #<23><>
+    "\x8e\xdf" => "\xa1\xac",    #<23><>
+);
+
+%_D2Z = (
+    "\x8e\xb6\x8e\xde" => "\xa5\xac",    #<23><>
+    "\x8e\xb7\x8e\xde" => "\xa5\xae",    #<23><>
+    "\x8e\xb8\x8e\xde" => "\xa5\xb0",    #<23><>
+    "\x8e\xb9\x8e\xde" => "\xa5\xb2",    #<23><>
+    "\x8e\xba\x8e\xde" => "\xa5\xb4",    #<23><>
+    "\x8e\xbb\x8e\xde" => "\xa5\xb6",    #<23><>
+    "\x8e\xbc\x8e\xde" => "\xa5\xb8",    #<23><>
+    "\x8e\xbd\x8e\xde" => "\xa5\xba",    #<23><>
+    "\x8e\xbe\x8e\xde" => "\xa5\xbc",    #<23><>
+    "\x8e\xbf\x8e\xde" => "\xa5\xbe",    #<23><>
+    "\x8e\xc0\x8e\xde" => "\xa5\xc0",    #<23><>
+    "\x8e\xc1\x8e\xde" => "\xa5\xc2",    #<23><>
+    "\x8e\xc2\x8e\xde" => "\xa5\xc5",    #<23><>
+    "\x8e\xc3\x8e\xde" => "\xa5\xc7",    #<23><>
+    "\x8e\xc4\x8e\xde" => "\xa5\xc9",    #<23><>
+    "\x8e\xca\x8e\xde" => "\xa5\xd0",    #<23><>
+    "\x8e\xcb\x8e\xde" => "\xa5\xd3",    #<23><>
+    "\x8e\xcc\x8e\xde" => "\xa5\xd6",    #<23><>
+    "\x8e\xcd\x8e\xde" => "\xa5\xd9",    #<23><>
+    "\x8e\xce\x8e\xde" => "\xa5\xdc",    #<23><>
+    "\x8e\xca\x8e\xdf" => "\xa5\xd1",    #<23><>
+    "\x8e\xcb\x8e\xdf" => "\xa5\xd4",    #<23><>
+    "\x8e\xcc\x8e\xdf" => "\xa5\xd7",    #<23><>
+    "\x8e\xcd\x8e\xdf" => "\xa5\xda",    #<23><>
+    "\x8e\xce\x8e\xdf" => "\xa5\xdd",    #<23><>
+    "\x8e\xb3\x8e\xde" => "\xa5\xf4",    #<23><>
+);
+
+# init only once;
+
+#$_PAT_D2Z = join("|", keys %_D2Z);
+#$_PAT_H2Z = join("|", keys %_H2Z);
+
+%_Z2H = reverse %_H2Z;
+%_Z2D = reverse %_D2Z;
+
+#$_PAT_Z2H    = join("|", keys %_Z2H);
+#$_PAT_Z2D    = join("|", keys %_Z2D);
+
+sub h2z {
+    no warnings qw(uninitialized);
+    my $r_str          = shift;
+    my ($keep_dakuten) = @_;
+    my $n              = 0;
+    unless ($keep_dakuten) {
+        $n = (
+            $$r_str =~ s(
+               ($RE{EUC_KANA}
+                (?:\x8e[\xde\xdf])?)
+               ){
+          my $str = $1;
+          $_D2Z{$str} || $_H2Z{$str} || 
+              # in case dakuten and handakuten are side-by-side!
+              $_H2Z{substr($str,0,2)} . $_H2Z{substr($str,2,2)};
+          }eogx
+        );
+    }
+    else {
+        $n = (
+            $$r_str =~ s(
+               ($RE{EUC_KANA})
+               ){
+          $_H2Z{$1};
+          }eogx
+        );
+    }
+    $n;
+}
+
+sub z2h {
+    my $r_str = shift;
+    my $n     = (
+        $$r_str =~ s(
+              ($RE{EUC_C}|$RE{EUC_0212}|$RE{EUC_KANA})
+              ){
+         $_Z2D{$1} || $_Z2H{$1} || $1;
+         }eogx
+    );
+    $n;
+}
+
+1;
+__END__
+
+
+=head1 NAME
+
+Encode::JP::H2Z -- internally used by Encode::JP::2022_JP*
+
+=cut
--- a/database/perl/lib/Encode/JP/JIS7.pm
+++ b/database/perl/lib/Encode/JP/JIS7.pm
@@ -0,0 +1,168 @@
+package Encode::JP::JIS7;
+use strict;
+use warnings;
+our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+use Encode qw(:fallbacks);
+
+for my $name ( '7bit-jis', 'iso-2022-jp', 'iso-2022-jp-1' ) {
+    my $h2z     = ( $name eq '7bit-jis' )    ? 0 : 1;
+    my $jis0212 = ( $name eq 'iso-2022-jp' ) ? 0 : 1;
+
+    my $obj = bless {
+        Name    => $name,
+        h2z     => $h2z,
+        jis0212 => $jis0212,
+    } => __PACKAGE__;
+    Encode::define_encoding($obj, $name);
+}
+
+use parent qw(Encode::Encoding);
+
+# we override this to 1 so PerlIO works
+sub needs_lines { 1 }
+
+use Encode::CJKConstants qw(:all);
+
+#
+# decode is identical for all 2022 variants
+#
+
+sub decode($$;$) {
+    my ( $obj, $str, $chk ) = @_;
+    return undef unless defined $str;
+    my $residue = '';
+    if ($chk) {
+        $str =~ s/([^\x00-\x7f].*)$//so and $residue = $1;
+    }
+    $residue .= jis_euc( \$str );
+    $_[1] = $residue if $chk;
+    return Encode::decode( 'euc-jp', $str, FB_PERLQQ );
+}
+
+#
+# encode is different
+#
+
+sub encode($$;$) {
+    require Encode::JP::H2Z;
+    my ( $obj, $utf8, $chk ) = @_;
+    return undef unless defined $utf8;
+
+    # empty the input string in the stack so perlio is ok
+    $_[1] = '' if $chk;
+    my ( $h2z, $jis0212 ) = @$obj{qw(h2z jis0212)};
+    my $octet = Encode::encode( 'euc-jp', $utf8, $chk || 0 );
+    $h2z and &Encode::JP::H2Z::h2z( \$octet );
+    euc_jis( \$octet, $jis0212 );
+    return $octet;
+}
+
+#
+# cat_decode
+#
+my $re_scan_jis_g = qr{
+   \G ( ($RE{JIS_0212}) |  $RE{JIS_0208}  |
+        ($RE{ISO_ASC})  | ($RE{JIS_KANA}) | )
+      ([^\e]*)
+}x;
+
+sub cat_decode {    # ($obj, $dst, $src, $pos, $trm, $chk)
+    my ( $obj, undef, undef, $pos, $trm ) = @_;    # currently ignores $chk
+    my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ];
+    local ${^ENCODING};
+    use bytes;
+    my $opos = pos($$rsrc);
+    pos($$rsrc) = $pos;
+    while ( $$rsrc =~ /$re_scan_jis_g/gc ) {
+        my ( $esc, $esc_0212, $esc_asc, $esc_kana, $chunk ) =
+          ( $1, $2, $3, $4, $5 );
+
+        unless ($chunk) { $esc or last; next; }
+
+        if ( $esc && !$esc_asc ) {
+            $chunk =~ tr/\x21-\x7e/\xa1-\xfe/;
+            if ($esc_kana) {
+                $chunk =~ s/([\xa1-\xdf])/\x8e$1/og;
+            }
+            elsif ($esc_0212) {
+                $chunk =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og;
+            }
+            $chunk = Encode::decode( 'euc-jp', $chunk, 0 );
+        }
+        elsif ( ( my $npos = index( $chunk, $trm ) ) >= 0 ) {
+            $$rdst .= substr( $chunk, 0, $npos + length($trm) );
+            $$rpos += length($esc) + $npos + length($trm);
+            pos($$rsrc) = $opos;
+            return 1;
+        }
+        $$rdst .= $chunk;
+        $$rpos = pos($$rsrc);
+    }
+    $$rpos = pos($$rsrc);
+    pos($$rsrc) = $opos;
+    return '';
+}
+
+# JIS<->EUC
+my $re_scan_jis = qr{
+   (?:($RE{JIS_0212})|$RE{JIS_0208}|($RE{ISO_ASC})|($RE{JIS_KANA}))([^\e]*)
+}x;
+
+sub jis_euc {
+    local ${^ENCODING};
+    my $r_str = shift;
+    $$r_str =~ s($re_scan_jis)
+    {
+    my ($esc_0212, $esc_asc, $esc_kana, $chunk) =
+       ($1, $2, $3, $4);
+    if (!$esc_asc) {
+        $chunk =~ tr/\x21-\x7e/\xa1-\xfe/;
+        if ($esc_kana) {
+        $chunk =~ s/([\xa1-\xdf])/\x8e$1/og;
+        }
+        elsif ($esc_0212) {
+        $chunk =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og;
+        }
+    }
+    $chunk;
+    }geox;
+    my ($residue) = ( $$r_str =~ s/(\e.*)$//so );
+    return $residue;
+}
+
+sub euc_jis {
+    no warnings qw(uninitialized);
+    local ${^ENCODING};
+    my $r_str   = shift;
+    my $jis0212 = shift;
+    $$r_str =~ s{
+    ((?:$RE{EUC_C})+|(?:$RE{EUC_KANA})+|(?:$RE{EUC_0212})+)
+    }{
+        my $chunk = $1;
+        my $esc =
+        ( $chunk =~ tr/\x8E//d ) ? $ESC{KANA} :
+            ( $chunk =~ tr/\x8F//d ) ? $ESC{JIS_0212} :
+            $ESC{JIS_0208};
+        if ($esc eq $ESC{JIS_0212} && !$jis0212){
+        # fallback to '?'
+        $chunk =~ tr/\xA1-\xFE/\x3F/;
+        }else{
+        $chunk =~ tr/\xA1-\xFE/\x21-\x7E/;
+        }
+        $esc . $chunk . $ESC{ASC};
+    }geox;
+    $$r_str =~ s/\Q$ESC{ASC}\E
+        (\Q$ESC{KANA}\E|\Q$ESC{JIS_0212}\E|\Q$ESC{JIS_0208}\E)/$1/gox;
+    $$r_str;
+}
+
+1;
+__END__
+
+
+=head1 NAME
+
+Encode::JP::JIS7 -- internally used by Encode::JP
+
+=cut
--- a/database/perl/lib/Encode/KR.pm
+++ b/database/perl/lib/Encode/KR.pm
@@ -0,0 +1,69 @@
+package Encode::KR;
+BEGIN {
+    if ( ord("A") == 193 ) {
+        die "Encode::KR not supported on EBCDIC\n";
+    }
+}
+use strict;
+use warnings;
+use Encode;
+our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+use XSLoader;
+XSLoader::load( __PACKAGE__, $VERSION );
+
+use Encode::KR::2022_KR;
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::KR - Korean Encodings
+
+=head1 SYNOPSIS
+
+    use Encode qw/encode decode/; 
+    $euc_kr = encode("euc-kr", $utf8);   # loads Encode::KR implicitly
+    $utf8   = decode("euc-kr", $euc_kr); # ditto
+
+=head1 DESCRIPTION
+
+This module implements Korean charset encodings.  Encodings supported
+are as follows.
+
+
+  Canonical   Alias		Description
+  --------------------------------------------------------------------
+  euc-kr      /\beuc.*kr$/i	EUC (Extended Unix Character)
+          /\bkr.*euc$/i
+  ksc5601-raw			Korean standard code set (as is)
+  cp949	      /(?:x-)?uhc$/i
+              /(?:x-)?windows-949$/i
+              /\bks_c_5601-1987$/i
+                                Code Page 949 (EUC-KR + 8,822 
+                                (additional Hangul syllables)
+  MacKorean			EUC-KR + Apple Vendor Mappings
+  johab       JOHAB             A supplementary encoding defined in 
+                                             Annex 3 of KS X 1001:1998
+  iso-2022-kr                   iso-2022-kr                  [RFC1557]
+  --------------------------------------------------------------------
+
+To find how to use this module in detail, see L<Encode>.
+
+=head1 BUGS
+
+When you see C<charset=ks_c_5601-1987> on mails and web pages, they really
+mean "cp949" encodings.  To fix that, the following aliases are set;
+
+  qr/(?:x-)?uhc$/i         => '"cp949"'
+  qr/(?:x-)?windows-949$/i => '"cp949"'
+  qr/ks_c_5601-1987$/i     => '"cp949"'
+
+The ASCII region (0x00-0x7f) is preserved for all encodings, even
+though this conflicts with mappings by the Unicode Consortium.
+
+=head1 SEE ALSO
+
+L<Encode>
+
+=cut
--- a/database/perl/lib/Encode/KR/2022_KR.pm
+++ b/database/perl/lib/Encode/KR/2022_KR.pm
@@ -0,0 +1,83 @@
+package Encode::KR::2022_KR;
+use strict;
+use warnings;
+our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+use Encode qw(:fallbacks);
+
+use parent qw(Encode::Encoding);
+__PACKAGE__->Define('iso-2022-kr');
+
+sub needs_lines { 1 }
+
+sub perlio_ok {
+    return 0;    # for the time being
+}
+
+sub decode {
+    my ( $obj, $str, $chk ) = @_;
+    return undef unless defined $str;
+    my $res     = $str;
+    my $residue = iso_euc( \$res );
+
+    # This is for PerlIO
+    $_[1] = $residue if $chk;
+    return Encode::decode( 'euc-kr', $res, FB_PERLQQ );
+}
+
+sub encode {
+    my ( $obj, $utf8, $chk ) = @_;
+    return undef unless defined $utf8;
+
+    # empty the input string in the stack so perlio is ok
+    $_[1] = '' if $chk;
+    my $octet = Encode::encode( 'euc-kr', $utf8, FB_PERLQQ );
+    euc_iso( \$octet );
+    return $octet;
+}
+
+use Encode::CJKConstants qw(:all);
+
+# ISO<->EUC
+
+sub iso_euc {
+    my $r_str = shift;
+    $$r_str =~ s/$RE{'2022_KR'}//gox;    # remove the designator
+    $$r_str =~ s{                      # replace characters in GL
+     \x0e                              # between SO(\x0e) and SI(\x0f)
+     ([^\x0f]*)                        # with characters in GR
+     \x0f
+        }
+    {
+                        my $out= $1;
+      $out =~ tr/\x21-\x7e/\xa1-\xfe/;
+      $out;
+    }geox;
+    my ($residue) = ( $$r_str =~ s/(\e.*)$//so );
+    return $residue;
+}
+
+sub euc_iso {
+    no warnings qw(uninitialized);
+    my $r_str = shift;
+    substr( $$r_str, 0, 0 ) =
+      $ESC{'2022_KR'};    # put the designator at the beg.
+    $$r_str =~
+      s{                         # move KS X 1001 characters in GR to GL
+        ($RE{EUC_C}+)                     # and enclose them with SO and SI
+        }{
+            my $str = $1;
+            $str =~ tr/\xA1-\xFE/\x21-\x7E/;
+            "\x0e" . $str . "\x0f";
+        }geox;
+    $$r_str;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::KR::2022_KR -- internally used by Encode::KR
+
+=cut
--- a/database/perl/lib/Encode/MIME/Header.pm
+++ b/database/perl/lib/Encode/MIME/Header.pm
@@ -0,0 +1,427 @@
+package Encode::MIME::Header;
+use strict;
+use warnings;
+
+our $VERSION = do { my @r = ( q$Revision: 2.28 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+use Carp ();
+use Encode ();
+use MIME::Base64 ();
+
+my %seed = (
+    decode_b => 1,       # decodes 'B' encoding ?
+    decode_q => 1,       # decodes 'Q' encoding ?
+    encode   => 'B',     # encode with 'B' or 'Q' ?
+    charset  => 'UTF-8', # encode charset
+    bpl      => 75,      # bytes per line
+);
+
+my @objs;
+
+push @objs, bless {
+    %seed,
+    Name     => 'MIME-Header',
+} => __PACKAGE__;
+
+push @objs, bless {
+    %seed,
+    decode_q => 0,
+    Name     => 'MIME-B',
+} => __PACKAGE__;
+
+push @objs, bless {
+    %seed,
+    decode_b => 0,
+    encode   => 'Q',
+    Name     => 'MIME-Q',
+} => __PACKAGE__;
+
+Encode::define_encoding($_, $_->{Name}) foreach @objs;
+
+use parent qw(Encode::Encoding);
+
+sub needs_lines { 1 }
+sub perlio_ok   { 0 }
+
+# RFC 2047 and RFC 2231 grammar
+my $re_charset = qr/[!"#\$%&'+\-0-9A-Z\\\^_`a-z\{\|\}~]+/;
+my $re_language = qr/[A-Za-z]{1,8}(?:-[0-9A-Za-z]{1,8})*/;
+my $re_encoding = qr/[QqBb]/;
+my $re_encoded_text = qr/[^\?]*/;
+my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/;
+my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding\?$re_encoded_text)\?=/;
+my $re_capture_encoded_word_split = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/;
+
+# in strict mode check also for valid base64 characters and also for valid quoted printable codes
+my $re_encoding_strict_b = qr/[Bb]/;
+my $re_encoding_strict_q = qr/[Qq]/;
+my $re_encoded_text_strict_b = qr/[0-9A-Za-z\+\/]*={0,2}/;
+my $re_encoded_text_strict_q = qr/(?:[\x21-\x3C\x3E\x40-\x7E]|=[0-9A-Fa-f]{2})*/; # NOTE: first part are printable US-ASCII except ?, =, SPACE and TAB
+my $re_encoded_word_strict = qr/=\?$re_charset(?:\*$re_language)?\?(?:$re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
+my $re_capture_encoded_word_strict = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
+
+my $re_newline = qr/(?:\r\n|[\r\n])/;
+
+# in strict mode encoded words must be always separated by spaces or tabs (or folded newline)
+# except in comments when separator between words and comment round brackets can be omitted
+my $re_word_begin_strict = qr/(?:(?:[ \t]|\A)\(?|(?:[^\\]|\A)\)\()/;
+my $re_word_sep_strict = qr/(?:$re_newline?[ \t])+/;
+my $re_word_end_strict = qr/(?:\)\(|\)?(?:$re_newline?[ \t]|\z))/;
+
+my $re_match = qr/()((?:$re_encoded_word\s*)*$re_encoded_word)()/;
+my $re_match_strict = qr/($re_word_begin_strict)((?:$re_encoded_word_strict$re_word_sep_strict)*$re_encoded_word_strict)(?=$re_word_end_strict)/;
+
+my $re_capture = qr/$re_capture_encoded_word(?:\s*)?/;
+my $re_capture_strict = qr/$re_capture_encoded_word_strict$re_word_sep_strict?/;
+
+our $STRICT_DECODE = 0;
+
+sub decode($$;$) {
+    my ($obj, $str, $chk) = @_;
+    return undef unless defined $str;
+
+    my $re_match_decode = $STRICT_DECODE ? $re_match_strict : $re_match;
+    my $re_capture_decode = $STRICT_DECODE ? $re_capture_strict : $re_capture;
+
+    my $stop = 0;
+    my $output = substr($str, 0, 0); # to propagate taintedness
+
+    # decode each line separately, match whole continuous folded line at one call
+    1 while not $stop and $str =~ s{^((?:[^\r\n]*(?:$re_newline[ \t])?)*)($re_newline)?}{
+
+        my $line = $1;
+        my $sep = defined $2 ? $2 : '';
+
+        $stop = 1 unless length($line) or length($sep);
+
+        # NOTE: this code partially could break $chk support
+        # in non strict mode concat consecutive encoded mime words with same charset, language and encoding
+        # fixes breaking inside multi-byte characters
+        1 while not $STRICT_DECODE and $line =~ s/$re_capture_encoded_word_split\s*=\?\1\2\?\3\?($re_encoded_text)\?=/=\?$1$2\?$3\?$4$5\?=/so;
+
+        # process sequence of encoded MIME words at once
+        1 while not $stop and $line =~ s{^(.*?)$re_match_decode}{
+
+            my $begin = $1 . $2;
+            my $words = $3;
+
+            $begin =~ tr/\r\n//d;
+            $output .= $begin;
+
+            # decode one MIME word
+            1 while not $stop and $words =~ s{^(.*?)($re_capture_decode)}{
+
+                $output .= $1;
+                my $orig = $2;
+                my $charset = $3;
+                my ($mime_enc, $text) = split /\?/, $5;
+
+                $text =~ tr/\r\n//d;
+
+                my $enc = Encode::find_mime_encoding($charset);
+
+                # in non strict mode allow also perl encoding aliases
+                if ( not defined $enc and not $STRICT_DECODE ) {
+                    # make sure that decoded string will be always strict UTF-8
+                    $charset = 'UTF-8' if lc($charset) eq 'utf8';
+                    $enc = Encode::find_encoding($charset);
+                }
+
+                if ( not defined $enc ) {
+                    Carp::croak qq(Unknown charset "$charset") if not ref $chk and $chk and $chk & Encode::DIE_ON_ERR;
+                    Carp::carp qq(Unknown charset "$charset") if not ref $chk and $chk and $chk & Encode::WARN_ON_ERR;
+                    $stop = 1 if not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
+                    $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
+                    $stop ? $orig : '';
+                } else {
+                    if ( uc($mime_enc) eq 'B' and $obj->{decode_b} ) {
+                        my $decoded = _decode_b($enc, $text, $chk);
+                        $stop = 1 if not defined $decoded and not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
+                        $output .= (defined $decoded ? $decoded : $text) unless $stop;
+                        $stop ? $orig : '';
+                    } elsif ( uc($mime_enc) eq 'Q' and $obj->{decode_q} ) {
+                        my $decoded = _decode_q($enc, $text, $chk);
+                        $stop = 1 if not defined $decoded and not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
+                        $output .= (defined $decoded ? $decoded : $text) unless $stop;
+                        $stop ? $orig : '';
+                    } else {
+                        Carp::croak qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk and $chk & Encode::DIE_ON_ERR;
+                        Carp::carp qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk and $chk & Encode::WARN_ON_ERR;
+                        $stop = 1 if not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
+                        $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
+                        $stop ? $orig : '';
+                    }
+                }
+
+            }se;
+
+            if ( not $stop ) {
+                $output .= $words;
+                $words = '';
+            }
+
+            $words;
+
+        }se;
+
+        if ( not $stop ) {
+            $line =~ tr/\r\n//d;
+            $output .= $line . $sep;
+            $line = '';
+            $sep = '';
+        }
+
+        $line . $sep;
+
+    }se;
+
+    $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
+    return $output;
+}
+
+sub _decode_b {
+    my ($enc, $text, $chk) = @_;
+    # MIME::Base64::decode ignores everything after a '=' padding character
+    # in non strict mode split string after each sequence of padding characters and decode each substring
+    my $octets = $STRICT_DECODE ?
+        MIME::Base64::decode($text) :
+        join('', map { MIME::Base64::decode($_) } split /(?<==)(?=[^=])/, $text);
+    return _decode_octets($enc, $octets, $chk);
+}
+
+sub _decode_q {
+    my ($enc, $text, $chk) = @_;
+    $text =~ s/_/ /go;
+    $text =~ s/=([0-9A-Fa-f]{2})/pack('C', hex($1))/ego;
+    return _decode_octets($enc, $text, $chk);
+}
+
+sub _decode_octets {
+    my ($enc, $octets, $chk) = @_;
+    $chk = 0 unless defined $chk;
+    $chk &= ~Encode::LEAVE_SRC if not ref $chk and $chk;
+    my $output = $enc->decode($octets, $chk);
+    return undef if not ref $chk and $chk and $octets ne '';
+    return $output;
+}
+
+sub encode($$;$) {
+    my ($obj, $str, $chk) = @_;
+    return undef unless defined $str;
+    my $output = $obj->_fold_line($obj->_encode_string($str, $chk));
+    $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
+    return $output . substr($str, 0, 0); # to propagate taintedness
+}
+
+sub _fold_line {
+    my ($obj, $line) = @_;
+    my $bpl = $obj->{bpl};
+    my $output = '';
+
+    while ( length($line) ) {
+        if ( $line =~ s/^(.{0,$bpl})(\s|\z)// ) {
+            $output .= $1;
+            $output .= "\r\n" . $2 if length($line);
+        } elsif ( $line =~ s/(\s)(.*)$// ) {
+            $output .= $line;
+            $line = $2;
+            $output .= "\r\n" . $1 if length($line);
+        } else {
+            $output .= $line;
+            last;
+        }
+    }
+
+    return $output;
+}
+
+sub _encode_string {
+    my ($obj, $str, $chk) = @_;
+    my $wordlen = $obj->{bpl} > 76 ? 76 : $obj->{bpl};
+    my $enc = Encode::find_mime_encoding($obj->{charset});
+    my $enc_chk = $chk;
+    $enc_chk = 0 unless defined $enc_chk;
+    $enc_chk |= Encode::LEAVE_SRC if not ref $enc_chk and $enc_chk;
+    my @result = ();
+    my $octets = '';
+    while ( length( my $chr = substr($str, 0, 1, '') ) ) {
+        my $seq = $enc->encode($chr, $enc_chk);
+        if ( not length($seq) ) {
+            substr($str, 0, 0, $chr);
+            last;
+        }
+        if ( $obj->_encoded_word_len($octets . $seq) > $wordlen ) {
+            push @result, $obj->_encode_word($octets);
+            $octets = '';
+        }
+        $octets .= $seq;
+    }
+    length($octets) and push @result, $obj->_encode_word($octets);
+    $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
+    return join(' ', @result);
+}
+
+sub _encode_word {
+    my ($obj, $octets) = @_;
+    my $charset = $obj->{charset};
+    my $encode = $obj->{encode};
+    my $text = $encode eq 'B' ? _encode_b($octets) : _encode_q($octets);
+    return "=?$charset?$encode?$text?=";
+}
+
+sub _encoded_word_len {
+    my ($obj, $octets) = @_;
+    my $charset = $obj->{charset};
+    my $encode = $obj->{encode};
+    my $text_len = $encode eq 'B' ? _encoded_b_len($octets) : _encoded_q_len($octets);
+    return length("=?$charset?$encode??=") + $text_len;
+}
+
+sub _encode_b {
+    my ($octets) = @_;
+    return MIME::Base64::encode($octets, '');
+}
+
+sub _encoded_b_len {
+    my ($octets) = @_;
+    return ( length($octets) + 2 ) / 3 * 4;
+}
+
+my $re_invalid_q_char = qr/[^0-9A-Za-z !*+\-\/]/;
+
+sub _encode_q {
+    my ($octets) = @_;
+    $octets =~ s{($re_invalid_q_char)}{
+        join('', map { sprintf('=%02X', $_) } unpack('C*', $1))
+    }egox;
+    $octets =~ s/ /_/go;
+    return $octets;
+}
+
+sub _encoded_q_len {
+    my ($octets) = @_;
+    my $invalid_count = () = $octets =~ /$re_invalid_q_char/sgo;
+    return ( $invalid_count * 3 ) + ( length($octets) - $invalid_count );
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::MIME::Header -- MIME encoding for an unstructured email header
+
+=head1 SYNOPSIS
+
+    use Encode qw(encode decode);
+
+    my $mime_str = encode("MIME-Header", "Sample:Text \N{U+263A}");
+    # $mime_str is "=?UTF-8?B?U2FtcGxlOlRleHQg4pi6?="
+
+    my $mime_q_str = encode("MIME-Q", "Sample:Text \N{U+263A}");
+    # $mime_q_str is "=?UTF-8?Q?Sample=3AText_=E2=98=BA?="
+
+    my $str = decode("MIME-Header",
+        "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\r\n " .
+        "=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?="
+    );
+    # $str is "If you can read this you understand the example."
+
+    use Encode qw(decode :fallbacks);
+    use Encode::MIME::Header;
+    local $Encode::MIME::Header::STRICT_DECODE = 1;
+    my $strict_string = decode("MIME-Header", $mime_string, FB_CROAK);
+    # use strict decoding and croak on errors
+
+=head1 ABSTRACT
+
+This module implements L<RFC 2047|https://tools.ietf.org/html/rfc2047> MIME
+encoding for an unstructured field body of the email header.  It can also be
+used for L<RFC 822|https://tools.ietf.org/html/rfc822> 'text' token.  However,
+it cannot be used directly for the whole header with the field name or for the
+structured header fields like From, To, Cc, Message-Id, etc...  There are 3
+encoding names supported by this module: C<MIME-Header>, C<MIME-B> and
+C<MIME-Q>.
+
+=head1 DESCRIPTION
+
+Decode method takes an unstructured field body of the email header (or
+L<RFC 822|https://tools.ietf.org/html/rfc822> 'text' token) as its input and
+decodes each MIME encoded-word from input string to a sequence of bytes
+according to L<RFC 2047|https://tools.ietf.org/html/rfc2047> and
+L<RFC 2231|https://tools.ietf.org/html/rfc2231>.  Subsequently, each sequence
+of bytes with the corresponding MIME charset is decoded with
+L<the Encode module|Encode> and finally, one output string is returned.  Text
+parts of the input string which do not contain MIME encoded-word stay
+unmodified in the output string.  Folded newlines between two consecutive MIME
+encoded-words are discarded, others are preserved in the output string.
+C<MIME-B> can decode Base64 variant, C<MIME-Q> can decode Quoted-Printable
+variant and C<MIME-Header> can decode both of them.  If L<Encode module|Encode>
+does not support particular MIME charset or chosen variant then an action based
+on L<CHECK flags|Encode/Handling Malformed Data> is performed (by default, the
+MIME encoded-word is not decoded).
+
+Encode method takes a scalar string as its input and uses
+L<strict UTF-8|Encode/UTF-8 vs. utf8 vs. UTF8> encoder for encoding it to UTF-8
+bytes.  Then a sequence of UTF-8 bytes is encoded into MIME encoded-words
+(C<MIME-Header> and C<MIME-B> use a Base64 variant while C<MIME-Q> uses a
+Quoted-Printable variant) where each MIME encoded-word is limited to 75
+characters.  MIME encoded-words are separated by C<CRLF SPACE> and joined to
+one output string.  Output string is suitable for unstructured field body of
+the email header.
+
+Both encode and decode methods propagate
+L<CHECK flags|Encode/Handling Malformed Data> when encoding and decoding the
+MIME charset.
+
+=head1 BUGS
+
+Versions prior to 2.22 (part of Encode 2.83) have a malfunctioning decoder
+and encoder.  The MIME encoder infamously inserted additional spaces or
+discarded white spaces between consecutive MIME encoded-words, which led to
+invalid MIME headers produced by this module.  The MIME decoder had a tendency
+to discard white spaces, incorrectly interpret data or attempt to decode Base64
+MIME encoded-words as Quoted-Printable.  These problems were fixed in version
+2.22.  It is highly recommended not to use any version prior 2.22!
+
+Versions prior to 2.24 (part of Encode 2.87) ignored
+L<CHECK flags|Encode/Handling Malformed Data>.  The MIME encoder used
+L<not strict utf8|Encode/UTF-8 vs. utf8 vs. UTF8> encoder for input Unicode
+strings which could lead to invalid UTF-8 sequences.  MIME decoder used also
+L<not strict utf8|Encode/UTF-8 vs. utf8 vs. UTF8> decoder and additionally
+called the decode method with a C<Encode::FB_PERLQQ> flag (thus user-specified
+L<CHECK flags|Encode/Handling Malformed Data> were ignored).  Moreover, it
+automatically croaked when a MIME encoded-word contained unknown encoding.
+Since version 2.24, this module uses
+L<strict UTF-8|Encode/UTF-8 vs. utf8 vs. UTF8> encoder and decoder.  And
+L<CHECK flags|Encode/Handling Malformed Data> are correctly propagated.
+
+Since version 2.22 (part of Encode 2.83), the MIME encoder should be fully
+compliant to L<RFC 2047|https://tools.ietf.org/html/rfc2047> and
+L<RFC 2231|https://tools.ietf.org/html/rfc2231>.  Due to the aforementioned
+bugs in previous versions of the MIME encoder, there is a I<less strict>
+compatible mode for the MIME decoder which is used by default.  It should be
+able to decode MIME encoded-words encoded by pre 2.22 versions of this module.
+However, note that this is not correct according to
+L<RFC 2047|https://tools.ietf.org/html/rfc2047>.
+
+In default I<not strict> mode the MIME decoder attempts to decode every substring
+which looks like a MIME encoded-word.  Therefore, the MIME encoded-words do not
+need to be separated by white space.  To enforce a correct I<strict> mode, set
+variable C<$Encode::MIME::Header::STRICT_DECODE> to 1 e.g. by localizing:
+
+  use Encode::MIME::Header;
+  local $Encode::MIME::Header::STRICT_DECODE = 1;
+
+=head1 AUTHORS
+
+Pali E<lt>pali@cpan.orgE<gt>
+
+=head1 SEE ALSO
+
+L<Encode>,
+L<RFC 822|https://tools.ietf.org/html/rfc822>,
+L<RFC 2047|https://tools.ietf.org/html/rfc2047>,
+L<RFC 2231|https://tools.ietf.org/html/rfc2231>
+
+=cut
--- a/database/perl/lib/Encode/MIME/Header/ISO_2022_JP.pm
+++ b/database/perl/lib/Encode/MIME/Header/ISO_2022_JP.pm
@@ -0,0 +1,133 @@
+package Encode::MIME::Header::ISO_2022_JP;
+
+use strict;
+use warnings;
+
+use parent qw(Encode::MIME::Header);
+
+my $obj =
+  bless { decode_b => '1', decode_q => '1', encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } =>
+  __PACKAGE__;
+Encode::define_encoding($obj, 'MIME-Header-ISO_2022_JP');
+
+use constant HEAD => '=?ISO-2022-JP?B?';
+use constant TAIL => '?=';
+
+use Encode::CJKConstants qw(%RE);
+
+our $VERSION = do { my @r = ( q$Revision: 1.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+# I owe the below codes totally to
+#   Jcode by Dan Kogai & http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64
+
+sub encode {
+    my $self = shift;
+    my $str  = shift;
+    return undef unless defined $str;
+
+    utf8::encode($str) if ( Encode::is_utf8($str) );
+    Encode::from_to( $str, 'utf8', 'euc-jp' );
+
+    my ($trailing_crlf) = ( $str =~ /(\n|\r|\x0d\x0a)$/o );
+
+    $str = _mime_unstructured_header( $str, $self->{bpl} );
+
+    not $trailing_crlf and $str =~ s/(\n|\r|\x0d\x0a)$//o;
+
+    return $str;
+}
+
+sub _mime_unstructured_header {
+    my ( $oldheader, $bpl ) = @_;
+    my $crlf = $oldheader =~ /\n$/;
+    my ( $header, @words, @wordstmp, $i ) = ('');
+
+    $oldheader =~ s/\s+$//;
+
+    @wordstmp = split /\s+/, $oldheader;
+
+    for ( $i = 0 ; $i < $#wordstmp ; $i++ ) {
+        if (    $wordstmp[$i] !~ /^[\x21-\x7E]+$/
+            and $wordstmp[ $i + 1 ] !~ /^[\x21-\x7E]+$/ )
+        {
+            $wordstmp[ $i + 1 ] = "$wordstmp[$i] $wordstmp[$i + 1]";
+        }
+        else {
+            push( @words, $wordstmp[$i] );
+        }
+    }
+
+    push( @words, $wordstmp[-1] );
+
+    for my $word (@words) {
+        if ( $word =~ /^[\x21-\x7E]+$/ ) {
+            $header =~ /(?:.*\n)*(.*)/;
+            if ( length($1) + length($word) > $bpl ) {
+                $header .= "\n $word";
+            }
+            else {
+                $header .= $word;
+            }
+        }
+        else {
+            $header = _add_encoded_word( $word, $header, $bpl );
+        }
+
+        $header =~ /(?:.*\n)*(.*)/;
+
+        if ( length($1) == $bpl ) {
+            $header .= "\n ";
+        }
+        else {
+            $header .= ' ';
+        }
+    }
+
+    $header =~ s/\n? $//mg;
+
+    $crlf ? "$header\n" : $header;
+}
+
+sub _add_encoded_word {
+    my ( $str, $line, $bpl ) = @_;
+    my $result = '';
+
+    while ( length($str) ) {
+        my $target = $str;
+        $str = '';
+
+        if (
+            length($line) + 22 +
+            ( $target =~ /^(?:$RE{EUC_0212}|$RE{EUC_C})/o ) * 8 > $bpl )
+        {
+            $line =~ s/[ \t\n\r]*$/\n/;
+            $result .= $line;
+            $line = ' ';
+        }
+
+        while (1) {
+            my $iso_2022_jp = $target;
+            Encode::from_to( $iso_2022_jp, 'euc-jp', 'iso-2022-jp' );
+
+            my $encoded =
+              HEAD . MIME::Base64::encode_base64( $iso_2022_jp, '' ) . TAIL;
+
+            if ( length($encoded) + length($line) > $bpl ) {
+                $target =~
+                  s/($RE{EUC_0212}|$RE{EUC_KANA}|$RE{EUC_C}|$RE{ASCII})$//o;
+                $str = $1 . $str;
+            }
+            else {
+                $line .= $encoded;
+                last;
+            }
+        }
+
+    }
+
+    $result . $line;
+}
+
+1;
+__END__
+
--- a/database/perl/lib/Encode/MIME/Name.pm
+++ b/database/perl/lib/Encode/MIME/Name.pm
@@ -0,0 +1,103 @@
+package Encode::MIME::Name;
+use strict;
+use warnings;
+our $VERSION = do { my @r = ( q$Revision: 1.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+# NOTE: This table must be 1:1 mapping
+our %MIME_NAME_OF = (
+    'AdobeStandardEncoding' => 'Adobe-Standard-Encoding',
+    'AdobeSymbol'           => 'Adobe-Symbol-Encoding',
+    'ascii'                 => 'US-ASCII',
+    'big5-hkscs'            => 'Big5-HKSCS',
+    'cp1026'                => 'IBM1026',
+    'cp1047'                => 'IBM1047',
+    'cp1250'                => 'windows-1250',
+    'cp1251'                => 'windows-1251',
+    'cp1252'                => 'windows-1252',
+    'cp1253'                => 'windows-1253',
+    'cp1254'                => 'windows-1254',
+    'cp1255'                => 'windows-1255',
+    'cp1256'                => 'windows-1256',
+    'cp1257'                => 'windows-1257',
+    'cp1258'                => 'windows-1258',
+    'cp37'                  => 'IBM037',
+    'cp424'                 => 'IBM424',
+    'cp437'                 => 'IBM437',
+    'cp500'                 => 'IBM500',
+    'cp775'                 => 'IBM775',
+    'cp850'                 => 'IBM850',
+    'cp852'                 => 'IBM852',
+    'cp855'                 => 'IBM855',
+    'cp857'                 => 'IBM857',
+    'cp860'                 => 'IBM860',
+    'cp861'                 => 'IBM861',
+    'cp862'                 => 'IBM862',
+    'cp863'                 => 'IBM863',
+    'cp864'                 => 'IBM864',
+    'cp865'                 => 'IBM865',
+    'cp866'                 => 'IBM866',
+    'cp869'                 => 'IBM869',
+    'cp936'                 => 'GBK',
+    'euc-cn'                => 'EUC-CN',
+    'euc-jp'                => 'EUC-JP',
+    'euc-kr'                => 'EUC-KR',
+    #'gb2312-raw'            => 'GB2312', # no, you're wrong, I18N::Charset
+    'hp-roman8'             => 'hp-roman8',
+    'hz'                    => 'HZ-GB-2312',
+    'iso-2022-jp'           => 'ISO-2022-JP',
+    'iso-2022-jp-1'         => 'ISO-2022-JP-1',
+    'iso-2022-kr'           => 'ISO-2022-KR',
+    'iso-8859-1'            => 'ISO-8859-1',
+    'iso-8859-10'           => 'ISO-8859-10',
+    'iso-8859-13'           => 'ISO-8859-13',
+    'iso-8859-14'           => 'ISO-8859-14',
+    'iso-8859-15'           => 'ISO-8859-15',
+    'iso-8859-16'           => 'ISO-8859-16',
+    'iso-8859-2'            => 'ISO-8859-2',
+    'iso-8859-3'            => 'ISO-8859-3',
+    'iso-8859-4'            => 'ISO-8859-4',
+    'iso-8859-5'            => 'ISO-8859-5',
+    'iso-8859-6'            => 'ISO-8859-6',
+    'iso-8859-7'            => 'ISO-8859-7',
+    'iso-8859-8'            => 'ISO-8859-8',
+    'iso-8859-9'            => 'ISO-8859-9',
+    #'jis0201-raw'           => 'JIS_X0201',
+    #'jis0208-raw'           => 'JIS_C6226-1983',
+    #'jis0212-raw'           => 'JIS_X0212-1990',
+    'koi8-r'                => 'KOI8-R',
+    'koi8-u'                => 'KOI8-U',
+    #'ksc5601-raw'           => 'KS_C_5601-1987',
+    'shiftjis'              => 'Shift_JIS',
+    'UTF-16'                => 'UTF-16',
+    'UTF-16BE'              => 'UTF-16BE',
+    'UTF-16LE'              => 'UTF-16LE',
+    'UTF-32'                => 'UTF-32',
+    'UTF-32BE'              => 'UTF-32BE',
+    'UTF-32LE'              => 'UTF-32LE',
+    'UTF-7'                 => 'UTF-7',
+    'utf-8-strict'          => 'UTF-8',
+    'viscii'                => 'VISCII',
+);
+
+# NOTE: %MIME_NAME_OF is still 1:1 mapping
+our %ENCODE_NAME_OF = map { uc $MIME_NAME_OF{$_} => $_ } keys %MIME_NAME_OF;
+
+# Add additional 1:N mapping
+$MIME_NAME_OF{'utf8'} = 'UTF-8';
+
+sub get_mime_name($) { $MIME_NAME_OF{$_[0]} };
+
+sub get_encode_name($) { $ENCODE_NAME_OF{uc $_[0]} };
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::MIME::NAME -- internally used by Encode
+
+=head1 SEE ALSO
+
+L<I18N::Charset>
+
+=cut
--- a/database/perl/lib/Encode/Makefile_PL.e2x
+++ b/database/perl/lib/Encode/Makefile_PL.e2x
@@ -0,0 +1,190 @@
+#
+# This file is auto-generated by:
+# enc2xs version $_Version_
+# $_Now_
+#
+use 5.7.2;
+use strict;
+use ExtUtils::MakeMaker;
+use Config;
+
+# Please edit the following to the taste!
+my $name = '$_Name_';
+my %tables = (
+         $_Name__t   => [ $_TableFiles_ ],
+         );
+
+#### DO NOT EDIT BEYOND THIS POINT!
+require File::Spec;
+my ($enc2xs, $encode_h) = ();
+my @path_ext = ('');
+@path_ext = split(';', $ENV{PATHEXT}) if $^O eq 'MSWin32';
+PATHLOOP:
+for my $d (@Config{qw/bin sitebin vendorbin/}, 
+       (split /$Config{path_sep}/o, $ENV{PATH})){
+    for my $f (qw/enc2xs enc2xs5.7.3/){
+        my $path = File::Spec->catfile($d, $f);
+        for my $ext (@path_ext) {
+            my $bin = "$path$ext";
+            -r "$bin" and $enc2xs = $bin and last PATHLOOP;
+        }
+    }
+}
+$enc2xs or die "enc2xs not found!";
+print "enc2xs is $enc2xs\n";
+my %encode_h = ();
+for my $d (@INC){
+    my $dir = File::Spec->catfile($d, "Encode");
+    my $file =  File::Spec->catfile($dir, "encode.h");
+    -f $file and $encode_h{$dir} = -M $file;
+}
+%encode_h or die "encode.h not found!";
+# find the latest one
+($encode_h) = sort {$encode_h{$b} <=> $encode_h{$a}} keys %encode_h;
+print "encode.h is at $encode_h\n";
+
+WriteMakefile(
+              INC		=> "-I$encode_h",
+#### END_OF_HEADER -- DO NOT EDIT THIS LINE BY HAND! ####
+          NAME		=> 'Encode::'.$name,
+          VERSION_FROM	=> "$name.pm",
+          OBJECT		=> '$(O_FILES)',
+          'dist'		=> {
+          COMPRESS	=> 'gzip -9f',
+          SUFFIX	=> 'gz',
+          DIST_DEFAULT => 'all tardist',
+          },
+          MAN3PODS	=> {},
+          PREREQ_PM => {
+                'Encode'     => "1.41",
+                           },
+          # OS 390 winges about line numbers > 64K ???
+          XSOPT => '-nolinenumbers',
+          );
+
+package MY;
+
+sub post_initialize
+{
+    my ($self) = @_;
+    my %o;
+    my $x = $self->{'OBJ_EXT'};
+    # Add the table O_FILES
+    foreach my $e (keys %tables)
+    {
+    $o{$e.$x} = 1;
+    }
+    $o{"$name$x"} = 1;
+    $self->{'O_FILES'} = [sort keys %o];
+    my @files = ("$name.xs");
+    $self->{'C'} = ["$name.c"];
+    # The next two lines to make MacPerl Happy -- dankogai via pudge
+    $self->{SOURCE} .= " $name.c"
+        if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$name\.c\b/;
+    # $self->{'H'} = [$self->catfile($self->updir,'encode.h')];
+    my %xs;
+    foreach my $table (sort keys %tables) {
+    push (@{$self->{'C'}},"$table.c");
+    # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
+    # get built.
+    foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
+        push (@files,$table.$ext);
+    }
+    }
+    $self->{'XS'} = { "$name.xs" => "$name.c" };
+    $self->{'clean'}{'FILES'} .= join(' ',@files);
+    open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
+    print XS <<'END';
+#include <EXTERN.h>
+#include <perl.h>
+#include <XSUB.h>
+#include "encode.h"
+END
+    foreach my $table (sort keys %tables) {
+    print XS qq[#include "${table}.h"\n];
+    }
+    print XS <<"END";
+
+static void
+Encode_XSEncoding(pTHX_ encode_t *enc)
+{
+ dSP;
+ HV *stash = gv_stashpv("Encode::XS", TRUE);
+ SV *iv    = newSViv(PTR2IV(enc));
+ SV *sv    = sv_bless(newRV_noinc(iv),stash);
+ int i = 0;
+ /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
+ constness, in the hope that perl won't mess with it. */
+ assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
+ SvFLAGS(iv) |= SVp_POK;
+ SvPVX(iv) = (char*) enc->name[0];
+ PUSHMARK(sp);
+ XPUSHs(sv);
+ while (enc->name[i])
+  {
+   const char *name = enc->name[i++];
+   XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
+  }
+ PUTBACK;
+ call_pv("Encode::define_encoding",G_DISCARD);
+ SvREFCNT_dec(sv);
+}
+
+MODULE = Encode::$name	PACKAGE = Encode::$name
+PROTOTYPES: DISABLE
+BOOT:
+{
+END
+    foreach my $table (sort keys %tables) {
+    print XS qq[#include "${table}.exh"\n];
+    }
+    print XS "}\n";
+    close(XS);
+    return "# Built $name.xs\n\n";
+}
+
+sub postamble
+{
+    my $self = shift;
+    my $dir  = "."; # $self->catdir('Encode');
+    my $str  = "# $name\$(OBJ_EXT) depends on .h and .exh files not .c files - but all written by enc2xs\n";
+    $str    .= "$name.c : $name.xs ";
+    foreach my $table (sort keys %tables)
+    {
+    $str .= " $table.c";
+    }
+    $str .= "\n\n";
+    $str .= "$name\$(OBJ_EXT) : $name.c\n\n";
+
+    foreach my $table (sort keys %tables)
+    {
+    my $numlines = 1;
+    my $lengthsofar = length($str);
+    my $continuator = '';
+    $str .= "$table.c : Makefile.PL";
+    foreach my $file (@{$tables{$table}})
+    {
+        $str .= $continuator.' '.$self->catfile($dir,$file);
+        if ( length($str)-$lengthsofar > 128*$numlines )
+        {
+        $continuator .= " \\\n\t";
+        $numlines++;
+        } else {
+        $continuator = '';
+        }
+    }
+    my $plib   = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
+    my $ucopts = '-"Q"';
+    $str .=  
+        qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
+    open (FILELIST, ">$table.fnm")
+        || die "Could not open $table.fnm: $!";
+    foreach my $file (@{$tables{$table}})
+    {
+        print FILELIST $self->catfile($dir,$file) . "\n";
+    }
+    close(FILELIST);
+    }
+    return $str;
+}
+
--- a/database/perl/lib/Encode/PerlIO.pod
+++ b/database/perl/lib/Encode/PerlIO.pod
@@ -0,0 +1,167 @@
+=head1 NAME
+
+Encode::PerlIO -- a detailed document on Encode and PerlIO
+
+=head1 Overview
+
+It is very common to want to do encoding transformations when
+reading or writing files, network connections, pipes etc.
+If Perl is configured to use the new 'perlio' IO system then
+C<Encode> provides a "layer" (see L<PerlIO>) which can transform
+data as it is read or written.
+
+Here is how the blind poet would modernise the encoding:
+
+    use Encode;
+    open(my $iliad,'<:encoding(iso-8859-7)','iliad.greek');
+    open(my $utf8,'>:utf8','iliad.utf8');
+    my @epic = <$iliad>;
+    print $utf8 @epic;
+    close($utf8);
+    close($illiad);
+
+In addition, the new IO system can also be configured to read/write
+UTF-8 encoded characters (as noted above, this is efficient):
+
+    open(my $fh,'>:utf8','anything');
+    print $fh "Any \x{0021} string \N{SMILEY FACE}\n";
+
+Either of the above forms of "layer" specifications can be made the default
+for a lexical scope with the C<use open ...> pragma. See L<open>.
+
+Once a handle is open, its layers can be altered using C<binmode>.
+
+Without any such configuration, or if Perl itself is built using the
+system's own IO, then write operations assume that the file handle
+accepts only I<bytes> and will C<die> if a character larger than 255 is
+written to the handle. When reading, each octet from the handle becomes
+a byte-in-a-character. Note that this default is the same behaviour
+as bytes-only languages (including Perl before v5.6) would have,
+and is sufficient to handle native 8-bit encodings e.g. iso-8859-1,
+EBCDIC etc. and any legacy mechanisms for handling other encodings
+and binary data.
+
+In other cases, it is the program's responsibility to transform
+characters into bytes using the API above before doing writes, and to
+transform the bytes read from a handle into characters before doing
+"character operations" (e.g. C<lc>, C</\W+/>, ...).
+
+You can also use PerlIO to convert larger amounts of data you don't
+want to bring into memory.  For example, to convert between ISO-8859-1
+(Latin 1) and UTF-8 (or UTF-EBCDIC in EBCDIC machines):
+
+    open(F, "<:encoding(iso-8859-1)", "data.txt") or die $!;
+    open(G, ">:utf8",                 "data.utf") or die $!;
+    while (<F>) { print G }
+
+    # Could also do "print G <F>" but that would pull
+    # the whole file into memory just to write it out again.
+
+More examples:
+
+    open(my $f, "<:encoding(cp1252)")
+    open(my $g, ">:encoding(iso-8859-2)")
+    open(my $h, ">:encoding(latin9)")       # iso-8859-15
+
+See also L<encoding> for how to change the default encoding of the
+data in your script.
+
+=head1 How does it work?
+
+Here is a crude diagram of how filehandle, PerlIO, and Encode
+interact.
+
+  filehandle <-> PerlIO        PerlIO <-> scalar (read/printed)
+                       \      /
+                        Encode   
+
+When PerlIO receives data from either direction, it fills a buffer
+(currently with 1024 bytes) and passes the buffer to Encode.
+Encode tries to convert the valid part and passes it back to PerlIO,
+leaving invalid parts (usually a partial character) in the buffer.
+PerlIO then appends more data to the buffer, calls Encode again,
+and so on until the data stream ends.
+
+To do so, PerlIO always calls (de|en)code methods with CHECK set to 1.
+This ensures that the method stops at the right place when it
+encounters partial character.  The following is what happens when
+PerlIO and Encode tries to encode (from utf8) more than 1024 bytes
+and the buffer boundary happens to be in the middle of a character.
+
+   A   B   C   ....   ~     \x{3000}    ....
+  41  42  43   ....  7E   e3   80   80  ....
+  <- buffer --------------->
+  << encoded >>>>>>>>>>
+                       <- next buffer ------
+
+Encode converts from the beginning to \x7E, leaving \xe3 in the buffer
+because it is invalid (partial character).
+
+Unfortunately, this scheme does not work well with escape-based
+encodings such as ISO-2022-JP.
+
+=head1 Line Buffering
+
+Now let's see what happens when you try to decode from ISO-2022-JP and
+the buffer ends in the middle of a character.
+
+              JIS208-ESC   \x{5f3e}
+   A   B   C   ....   ~   \e   $   B  |DAN | ....
+  41  42  43   ....  7E   1b  24  41  43  46 ....
+  <- buffer --------------------------->
+  << encoded >>>>>>>>>>>>>>>>>>>>>>>
+
+As you see, the next buffer begins with \x43.  But \x43 is 'C' in
+ASCII, which is wrong in this case because we are now in JISX 0208
+area so it has to convert \x43\x46, not \x43.  Unlike utf8 and EUC,
+in escape-based encodings you can't tell if a given octet is a whole
+character or just part of it.
+
+Fortunately PerlIO also supports line buffer if you tell PerlIO to use
+one instead of fixed buffer.  Since ISO-2022-JP is guaranteed to revert to ASCII at the end of the line, partial
+character will never happen when line buffer is used.
+
+To tell PerlIO to use line buffer, implement -E<gt>needs_lines method
+for your encoding object.  See  L<Encode::Encoding> for details.
+
+Thanks to these efforts most encodings that come with Encode support
+PerlIO but that still leaves following encodings.
+
+  iso-2022-kr
+  MIME-B
+  MIME-Header
+  MIME-Q
+
+Fortunately iso-2022-kr is hardly used (according to Jungshik) and
+MIME-* are very unlikely to be fed to PerlIO because they are for mail
+headers.  See L<Encode::MIME::Header> for details.
+
+=head2 How can I tell whether my encoding fully supports PerlIO ?
+
+As of this writing, any encoding whose class belongs to Encode::XS and
+Encode::Unicode works.  The Encode module has a C<perlio_ok> method
+which you can use before applying PerlIO encoding to the filehandle.
+Here is an example:
+
+  my $use_perlio = perlio_ok($enc);
+  my $layer = $use_perlio ? "<:raw" : "<:encoding($enc)";
+  open my $fh, $layer, $file or die "$file : $!";
+  while(<$fh>){
+    $_ = decode($enc, $_) unless $use_perlio;
+    # .... 
+  }
+
+=head1 SEE ALSO
+
+L<Encode::Encoding>,
+L<Encode::Supported>,
+L<Encode::PerlIO>, 
+L<encoding>,
+L<perlebcdic>, 
+L<perlfunc/open>, 
+L<perlunicode>, 
+L<utf8>, 
+the Perl Unicode Mailing List E<lt>perl-unicode@perl.orgE<gt>
+
+=cut
+
--- a/database/perl/lib/Encode/README.e2x
+++ b/database/perl/lib/Encode/README.e2x
@@ -0,0 +1,31 @@
+Encode::$_Name_ version 0.1
+========
+
+NAME
+       Encode::$_Name_ - <describe encoding>
+
+SYNOPSIS
+       use Encode::$_Name_;
+       #<put more words here>
+ABSTRACT
+      <fill this in>
+INSTALLATION
+
+To install this module type the following:
+
+   perl Makefile.PL
+   make
+   make test
+   make install
+
+DEPENDENCIES
+
+This module requires perl version 5.7.3 or later.
+
+COPYRIGHT AND LICENCE
+
+Copyright (C) 2002 Your Name <your@address.domain>
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself. 
+
--- a/database/perl/lib/Encode/Supported.pod
+++ b/database/perl/lib/Encode/Supported.pod
@@ -0,0 +1,901 @@
+=head1 NAME
+
+Encode::Supported -- Encodings supported by Encode
+
+=head1 DESCRIPTION
+
+=head2 Encoding Names
+
+Encoding names are case insensitive. White space in names
+is ignored.  In addition, an encoding may have aliases.
+Each encoding has one "canonical" name.  The "canonical"
+name is chosen from the names of the encoding by picking
+the first in the following sequence (with a few exceptions).
+
+=over 2
+
+=item *
+
+The name used by the Perl community.  That includes 'utf8' and 'ascii'.
+Unlike aliases, canonical names directly reach the method so such
+frequently used words like 'utf8' don't need to do alias lookups.
+
+=item *
+
+The MIME name as defined in IETF RFCs.  This includes all "iso-"s.
+
+=item * 
+
+The name in the IANA registry.
+
+=item *
+
+The name used by the organization that defined it.
+
+=back
+
+In case I<de jure> canonical names differ from that of the Encode
+module, they are always aliased if it ever be implemented.  So you can
+safely tell if a given encoding is implemented or not just by passing 
+the canonical name.
+
+Because of all the alias issues, and because in the general case 
+encodings have state, "Encode" uses an encoding object internally 
+once an operation is in progress.
+
+=head1 Supported Encodings
+
+As of Perl 5.8.0, at least the following encodings are recognized.
+Note that unless otherwise specified, they are all case insensitive
+(via alias) and all occurrence of spaces are replaced with '-'.
+In other words, "ISO 8859 1" and "iso-8859-1" are identical.
+
+Encodings are categorized and implemented in several different modules
+but you don't have to C<use Encode::XX> to make them available for
+most cases.  Encode.pm will automatically load those modules on demand.
+
+=head2 Built-in Encodings
+
+The following encodings are always available.
+
+  Canonical     Aliases                      Comments & References
+  ----------------------------------------------------------------
+  ascii         US-ascii ISO-646-US                         [ECMA]
+  ascii-ctrl			                  Special Encoding
+  iso-8859-1    latin1                                       [ISO]
+  null				                  Special Encoding
+  utf8          UTF-8                                    [RFC2279]
+  ----------------------------------------------------------------
+
+I<null> and I<ascii-ctrl> are special.  "null" fails for all character
+so when you set fallback mode to PERLQQ, HTMLCREF or XMLCREF, ALL
+CHARACTERS will fall back to character references.  Ditto for
+"ascii-ctrl" except for control characters.  For fallback modes, see
+L<Encode>.
+
+=head2 Encode::Unicode -- other Unicode encodings
+
+Unicode coding schemes other than native utf8 are supported by
+Encode::Unicode, which will be autoloaded on demand.
+
+  ----------------------------------------------------------------
+  UCS-2BE       UCS-2, iso-10646-1                      [IANA, UC]
+  UCS-2LE                                                     [UC]
+  UTF-16                                                      [UC]
+  UTF-16BE                                                    [UC]
+  UTF-16LE                                                    [UC]
+  UTF-32                                                      [UC]
+  UTF-32BE	UCS-4                                         [UC]
+  UTF-32LE                                                    [UC]
+  UTF-7                                                  [RFC2152]
+  ----------------------------------------------------------------
+
+To find how (UCS-2|UTF-(16|32))(LE|BE)? differ from one another,
+see L<Encode::Unicode>. 
+
+UTF-7 is a special encoding which "re-encodes" UTF-16BE into a 7-bit
+encoding.  It is implemented separately by Encode::Unicode::UTF7.
+
+=head2 Encode::Byte -- Extended ASCII
+
+Encode::Byte implements most single-byte encodings except for
+Symbols and EBCDIC. The following encodings are based on single-byte
+encodings implemented as extended ASCII.  Most of them map
+\x80-\xff (upper half) to non-ASCII characters.
+
+=over 2
+
+=item ISO-8859 and corresponding vendor mappings
+
+Since there are so many, they are presented in table format with
+languages and corresponding encoding names by vendors.  Note that
+the table is sorted in order of ISO-8859 and the corresponding vendor
+mappings are slightly different from that of ISO.  See
+L<http://czyborra.com/charsets/iso8859.html> for details.
+
+  Lang/Regions  ISO/Other Std.  DOS     Windows Macintosh  Others
+  ----------------------------------------------------------------
+  N. America    (ASCII)         cp437        AdobeStandardEncoding
+                                cp863 (DOSCanadaF)
+  W. Europe     iso-8859-1      cp850   cp1252  MacRoman  nextstep
+                                                         hp-roman8
+                                cp860 (DOSPortuguese)
+  Cntrl. Europe iso-8859-2      cp852   cp1250  MacCentralEurRoman
+                                                MacCroatian
+                                                MacRomanian
+                                                MacRumanian
+  Latin3[1]     iso-8859-3      
+  Latin4[2]     iso-8859-4              
+  Cyrillics     iso-8859-5      cp855   cp1251  MacCyrillic
+    (See also next section)     cp866           MacUkrainian
+  Arabic        iso-8859-6      cp864   cp1256  MacArabic
+                                cp1006          MacFarsi
+  Greek         iso-8859-7      cp737   cp1253  MacGreek
+                                cp869 (DOSGreek2)
+  Hebrew        iso-8859-8      cp862   cp1255  MacHebrew
+  Turkish       iso-8859-9      cp857   cp1254  MacTurkish
+  Nordics       iso-8859-10     cp865
+                                cp861           MacIcelandic
+                                                MacSami
+  Thai          iso-8859-11[3]  cp874           MacThai
+  (iso-8859-12 is nonexistent. Reserved for Indics?)
+  Baltics       iso-8859-13     cp775           cp1257
+  Celtics       iso-8859-14
+  Latin9 [4]    iso-8859-15
+  Latin10       iso-8859-16
+  Vietnamese    viscii                  cp1258  MacVietnamese
+  ----------------------------------------------------------------
+
+  [1] Esperanto, Maltese, and Turkish. Turkish is now on 8859-9.
+  [2] Baltics.  Now on 8859-10, except for Latvian.
+  [3] TIS 620 +  Non-Breaking Space (0xA0 / U+00A0)
+  [4] Nicknamed Latin0; the Euro sign as well as French and Finnish
+      letters that are missing from 8859-1 were added.
+
+All cp* are also available as ibm-*, ms-*, and windows-* .  See also
+L<http://czyborra.com/charsets/codepages.html>.
+
+Macintosh encodings don't seem to be registered in such entities as
+IANA.  "Canonical" names in Encode are based upon Apple's Tech Note
+1150.  See L<http://developer.apple.com/technotes/tn/tn1150.html> 
+for details.
+
+=item KOI8 - De Facto Standard for the Cyrillic world
+
+Though ISO-8859 does have ISO-8859-5, the KOI8 series is far more
+popular in the Net.   L<Encode> comes with the following KOI charsets.
+For gory details, see L<http://czyborra.com/charsets/cyrillic.html>
+
+  ----------------------------------------------------------------
+  koi8-f                                        
+  koi8-r cp878                                           [RFC1489]
+  koi8-u                                                 [RFC2319]
+  ----------------------------------------------------------------
+
+=back
+
+=head2 gsm0338 - Hentai Latin 1
+
+GSM0338 is for GSM handsets. Though it shares alphanumerals with
+ASCII, control character ranges and other parts are mapped very
+differently, mainly to store Greek characters.  There are also escape
+sequences (starting with 0x1B) to cover e.g. the Euro sign.  
+
+This was once handled by L<Encode::Bytes> but because of all those
+unusual specifications, Encode 2.20 has relocated the support to
+L<Encode::GSM0338>. See L<Encode::GSM0338> for details.
+
+=over 2
+
+=item gsm0338 support before 2.19
+
+Some special cases like a trailing 0x00 byte or a lone 0x1B byte are not
+well-defined and decode() will return an empty string for them.
+One possible workaround is
+
+   $gsm =~ s/\x00\z/\x00\x00/;
+   $uni = decode("gsm0338", $gsm);
+   $uni .= "\xA0" if $gsm =~ /\x1B\z/;
+
+Note that the Encode implementation of GSM0338 does not implement the
+reuse of Latin capital letters as Greek capital letters (for example,
+the 0x5A is U+005A (LATIN CAPITAL LETTER Z), not U+0396 (GREEK CAPITAL
+LETTER ZETA).
+
+The GSM0338 is also covered in Encode::Byte even though it is not
+an "extended ASCII" encoding.
+
+=back
+
+=head2 CJK: Chinese, Japanese, Korean (Multibyte)
+
+Note that Vietnamese is listed above.  Also read "Encoding vs Charset"
+below.  Also note that these are implemented in distinct modules by
+countries, due to the size concerns (simplified Chinese is mapped
+to 'CN', continental China, while traditional Chinese is mapped to
+'TW', Taiwan).  Please refer to their respective documentation pages.
+
+=over 2
+
+=item Encode::CN -- Continental China
+
+  Standard      DOS/Win Macintosh                Comment/Reference
+  ----------------------------------------------------------------
+  euc-cn [1]            MacChineseSimp
+  (gbk)         cp936 [2]
+  gb12345-raw                      { GB12345 without CES }
+  gb2312-raw                       { GB2312  without CES }
+  hz
+  iso-ir-165
+  ----------------------------------------------------------------
+
+  [1] GB2312 is aliased to this.  See L<Microsoft-related naming mess>
+  [2] gbk is aliased to this.  See L<Microsoft-related naming mess>
+
+=item Encode::JP -- Japan
+
+  Standard      DOS/Win Macintosh                Comment/Reference
+  ----------------------------------------------------------------
+  euc-jp
+  shiftjis      cp932   macJapanese
+  7bit-jis
+  iso-2022-jp                                            [RFC1468]
+  iso-2022-jp-1                                          [RFC2237]
+  jis0201-raw  { JIS X 0201 (roman + halfwidth kana) without CES }
+  jis0208-raw  { JIS X 0208 (Kanji + fullwidth kana) without CES }
+  jis0212-raw  { JIS X 0212 (Extended Kanji)         without CES }
+  ----------------------------------------------------------------
+
+=item Encode::KR -- Korea
+
+  Standard      DOS/Win Macintosh                Comment/Reference
+  ----------------------------------------------------------------
+  euc-kr                MacKorean                        [RFC1557]
+                cp949 [1]                    
+  iso-2022-kr                                            [RFC1557]
+  johab                                  [KS X 1001:1998, Annex 3]
+  ksc5601-raw                              { KSC5601 without CES }
+  ----------------------------------------------------------------
+
+  [1] ks_c_5601-1987, (x-)?windows-949, and uhc are aliased to this.
+  See below.
+
+=item Encode::TW -- Taiwan
+
+  Standard      DOS/Win Macintosh                Comment/Reference
+  ----------------------------------------------------------------
+  big5-eten     cp950   MacChineseTrad {big5 aliased to big5-eten}
+  big5-hkscs                              
+  ----------------------------------------------------------------
+
+=item Encode::HanExtra -- More Chinese via CPAN
+
+Due to the size concerns, additional Chinese encodings below are
+distributed separately on CPAN, under the name Encode::HanExtra.
+
+  Standard      DOS/Win Macintosh                Comment/Reference
+  ----------------------------------------------------------------
+  big5ext                                   CMEX's Big5e Extension
+  big5plus                                  CMEX's Big5+ Extension
+  cccii         Chinese Character Code for Information Interchange
+  euc-tw                             EUC (Extended Unix Character)
+  gb18030                          GBK with Traditional Characters
+  ----------------------------------------------------------------
+
+=item Encode::JIS2K -- JIS X 0213 encodings via CPAN
+
+Due to size concerns, additional Japanese encodings below are
+distributed separately on CPAN, under the name Encode::JIS2K.
+
+  Standard      DOS/Win Macintosh                Comment/Reference
+  ----------------------------------------------------------------
+  euc-jisx0213
+  shiftjisx0123
+  iso-2022-jp-3
+  jis0213-1-raw
+  jis0213-2-raw
+  ----------------------------------------------------------------
+
+=back
+
+=head2 Miscellaneous encodings
+
+=over 2
+
+=item Encode::EBCDIC
+
+See L<perlebcdic> for details.
+
+  ----------------------------------------------------------------
+  cp37
+  cp500  
+  cp875  
+  cp1026  
+  cp1047  
+  posix-bc
+  ----------------------------------------------------------------
+
+=item Encode::Symbols
+
+For symbols  and dingbats.
+
+  ----------------------------------------------------------------
+  symbol
+  dingbats
+  MacDingbats
+  AdobeZdingbat
+  AdobeSymbol
+  ----------------------------------------------------------------
+
+=item Encode::MIME::Header
+
+Strictly speaking, MIME header encoding documented in RFC 2047 is more
+of encapsulation than encoding.  However, their support in modern
+world is imperative so they are supported.
+
+  ----------------------------------------------------------------
+  MIME-Header                                            [RFC2047]
+  MIME-B                                                 [RFC2047]
+  MIME-Q                                                 [RFC2047]
+  ----------------------------------------------------------------
+
+=item Encode::Guess
+
+This one is not a name of encoding but a utility that lets you pick up
+the most appropriate encoding for a data out of given I<suspects>.  See
+L<Encode::Guess> for details.
+
+=back
+
+=head1 Unsupported encodings
+
+The following encodings are not supported as yet; some because they
+are rarely used, some because of technical difficulties.  They may
+be supported by external modules via CPAN in the future, however.
+
+=over 2
+
+=item   ISO-2022-JP-2 [RFC1554]
+
+Not very popular yet.  Needs Unicode Database or equivalent to
+implement encode() (because it includes JIS X 0208/0212, KSC5601, and
+GB2312 simultaneously, whose code points in Unicode overlap.  So you
+need to lookup the database to determine to what character set a given
+Unicode character should belong). 
+
+=item ISO-2022-CN [RFC1922]
+
+Not very popular.  Needs CNS 11643-1 and -2 which are not available in
+this module.  CNS 11643 is supported (via euc-tw) in Encode::HanExtra.
+Audrey Tang may add support for this encoding in her module in future.
+
+=item Various HP-UX encodings
+
+The following are unsupported due to the lack of mapping data.
+
+  '8'  - arabic8, greek8, hebrew8, kana8, thai8, and turkish8
+  '15' - japanese15, korean15, and roi15
+
+=item Cyrillic encoding ISO-IR-111
+
+Anton Tagunov doubts its usefulness.
+
+=item ISO-8859-8-1 [Hebrew]
+
+None of the Encode team knows Hebrew enough (ISO-8859-8, cp1255 and
+MacHebrew are supported because and just because there were mappings
+available at L<http://www.unicode.org/>).  Contributions welcome.
+
+=item ISIRI 3342, Iran System, ISIRI 2900 [Farsi]
+
+Ditto.
+
+=item Thai encoding TCVN
+
+Ditto.
+
+=item Vietnamese encodings VPS
+
+Though Jungshik Shin has reported that Mozilla supports this encoding,
+it was too late before 5.8.0 for us to add it.  In the future, it
+may be available via a separate module.  See
+L<http://lxr.mozilla.org/seamonkey/source/intl/uconv/ucvlatin/vps.uf>
+and
+L<http://lxr.mozilla.org/seamonkey/source/intl/uconv/ucvlatin/vps.ut>
+if you are interested in helping us.
+
+=item Various Mac encodings
+
+The following are unsupported due to the lack of mapping data. 
+
+  MacArmenian,  MacBengali,   MacBurmese,   MacEthiopic
+  MacExtArabic, MacGeorgian,  MacKannada,   MacKhmer
+  MacLaotian,   MacMalayalam, MacMongolian, MacOriya
+  MacSinhalese, MacTamil,     MacTelugu,    MacTibetan
+  MacVietnamese
+
+The rest which are already available are based upon the vendor mappings
+at L<http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> .
+
+=item (Mac) Indic encodings
+
+The maps for the following are available at L<http://www.unicode.org/>
+but remain unsupported because those encodings need an algorithmical
+approach, currently unsupported by F<enc2xs>:
+
+  MacDevanagari
+  MacGurmukhi
+  MacGujarati
+
+For details, please see C<Unicode mapping issues and notes:> at
+L<http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/DEVANAGA.TXT> .
+
+I believe this issue is prevalent not only for Mac Indics but also in
+other Indic encodings, but the above were the only Indic encodings
+maps that I could find at L<http://www.unicode.org/> .
+
+=back
+
+=head1 Encoding vs. Charset -- terminology
+
+We are used to using the term (character) I<encoding> and I<character
+set> interchangeably.  But just as confusing the terms byte and
+character is dangerous and the terms should be differentiated when
+needed, we need to differentiate I<encoding> and I<character set>.
+
+To understand that, here is a description of how we make computers
+grok our characters.
+
+=over 2
+
+=item *
+
+First we start with which characters to include.  We call this
+collection of characters I<character repertoire>.
+
+=item *
+
+Then we have to give each character a unique ID so your computer can
+tell the difference between 'a' and 'A'.  This itemized character
+repertoire is now a I<character set>.
+
+=item *
+
+If your computer can grow the character set without further
+processing, you can go ahead and use it.  This is called a I<coded
+character set> (CCS) or I<raw character encoding>.  ASCII is used this
+way for most cases.
+
+=item *
+
+But in many cases, especially multi-byte CJK encodings, you have to
+tweak a little more.  Your network connection may not accept any data
+with the Most Significant Bit set, and your computer may not be able to
+tell if a given byte is a whole character or just half of it.  So you
+have to I<encode> the character set to use it.
+
+A I<character encoding scheme> (CES) determines how to encode a given
+character set, or a set of multiple character sets.  7bit ISO-2022 is
+an example of a CES.  You switch between character sets via I<escape
+sequences>.
+
+=back
+
+Technically, or mathematically, speaking, a character set encoded in
+such a CES that maps character by character may form a CCS.  EUC is such
+an example.  The CES of EUC is as follows:
+
+=over 2
+
+=item *
+
+Map ASCII unchanged.
+
+=item *
+
+Map such a character set that consists of 94 or 96 powered by N
+members by adding 0x80 to each byte.
+
+=item *
+
+You can also use 0x8e and 0x8f to indicate that the following sequence of
+characters belongs to yet another character set.  To each following byte
+is added the value 0x80.
+
+=back
+
+By carefully looking at the encoded byte sequence, you can find that the
+byte sequence conforms a unique number.  In that sense, EUC is a CCS
+generated by a CES above from up to four CCS (complicated?).  UTF-8
+falls into this category.  See L<perlUnicode/"UTF-8"> to find out how
+UTF-8 maps Unicode to a byte sequence.
+
+You may also have found out by now why 7bit ISO-2022 cannot comprise
+a CCS.  If you look at a byte sequence \x21\x21, you can't tell if
+it is two !'s or IDEOGRAPHIC SPACE.  EUC maps the latter to \xA1\xA1
+so you have no trouble differentiating between "!!". and S<"  ">.
+
+=head1 Encoding Classification (by Anton Tagunov and Dan Kogai)
+
+This section tries to classify the supported encodings by their 
+applicability for information exchange over the Internet and to 
+choose the most suitable aliases to name them in the context of 
+such communication.
+
+=over 2
+
+=item * 
+
+To (en|de)code encodings marked by C<(**)>, you need 
+C<Encode::HanExtra>, available from CPAN.
+
+=back
+
+Encoding names
+
+  US-ASCII    UTF-8    ISO-8859-*  KOI8-R
+  Shift_JIS   EUC-JP   ISO-2022-JP ISO-2022-JP-1
+  EUC-KR      Big5     GB2312
+
+are registered with IANA as preferred MIME names and may
+be used over the Internet.
+
+C<Shift_JIS> has been officialized by JIS X 0208:1997.
+L<Microsoft-related naming mess> gives details.
+
+C<GB2312> is the IANA name for C<EUC-CN>.
+See L<Microsoft-related naming mess> for details.
+
+C<GB_2312-80> I<raw> encoding is available as C<gb2312-raw>
+with Encode. See L<Encode::CN> for details.
+
+  EUC-CN
+  KOI8-U        [RFC2319]
+
+have not been registered with IANA (as of March 2002) but
+seem to be supported by major web browsers. 
+The IANA name for C<EUC-CN> is C<GB2312>.
+
+  KS_C_5601-1987
+
+is heavily misused.
+See L<Microsoft-related naming mess> for details.
+
+C<KS_C_5601-1987> I<raw> encoding is available as C<kcs5601-raw>
+with Encode. See L<Encode::KR> for details.
+
+  UTF-16 UTF-16BE UTF-16LE
+
+are IANA-registered C<charset>s. See [RFC 2781] for details.
+Jungshik Shin reports that UTF-16 with a BOM is well accepted
+by MS IE 5/6 and NS 4/6. Beware however that
+
+=over 2
+
+=item *
+
+C<UTF-16> support in any software you're going to be
+using/interoperating with has probably been less tested
+then C<UTF-8> support
+
+=item *
+
+C<UTF-8> coded data seamlessly passes traditional
+command piping (C<cat>, C<more>, etc.) while C<UTF-16> coded
+data is likely to cause confusion (with its zero bytes,
+for example)
+
+=item *
+
+it is beyond the power of words to describe the way HTML browsers
+encode non-C<ASCII> form data. To get a general impression, visit
+L<http://www.alanflavell.org.uk/charset/form-i18n.html>.
+While encoding of form data has stabilized for C<UTF-8> encoded pages
+(at least IE 5/6, NS 6, and Opera 6 behave consistently), be sure to
+expect fun (and cross-browser discrepancies) with C<UTF-16> encoded
+pages!
+
+=back
+
+The rule of thumb is to use C<UTF-8> unless you know what
+you're doing and unless you really benefit from using C<UTF-16>.
+
+  ISO-IR-165    [RFC1345]
+  VISCII
+  GB 12345
+  GB 18030 (**)  (see links below)
+  EUC-TW   (**)
+
+are totally valid encodings but not registered at IANA.
+The names under which they are listed here are probably the
+most widely-known names for these encodings and are recommended
+names.
+
+  BIG5PLUS (**)
+
+is a proprietary name. 
+
+=head2 Microsoft-related naming mess
+
+Microsoft products misuse the following names:
+
+=over 2
+
+=item KS_C_5601-1987
+
+Microsoft extension to C<EUC-KR>.
+
+Proper names: C<CP949>, C<UHC>, C<x-windows-949> (as used by Mozilla).
+
+See L<http://lists.w3.org/Archives/Public/ietf-charsets/2001AprJun/0033.html>
+for details.
+
+Encode aliases C<KS_C_5601-1987> to C<cp949> to reflect this common
+misusage. I<Raw> C<KS_C_5601-1987> encoding is available as
+C<kcs5601-raw>.
+
+See L<Encode::KR> for details.
+
+=item GB2312
+
+Microsoft extension to C<EUC-CN>.
+
+Proper names: C<CP936>, C<GBK>.
+
+C<GB2312> has been registered in the C<EUC-CN> meaning at
+IANA. This has partially repaired the situation: Microsoft's 
+C<GB2312> has become a superset of the official C<GB2312>.
+
+Encode aliases C<GB2312> to C<euc-cn> in full agreement with
+IANA registration. C<cp936> is supported separately.
+I<Raw> C<GB_2312-80> encoding is available as C<gb2312-raw>.
+
+See L<Encode::CN> for details.
+
+=item Big5
+
+Microsoft extension to C<Big5>.
+
+Proper name: C<CP950>.
+
+Encode separately supports C<Big5> and C<cp950>.
+
+=item Shift_JIS
+
+Microsoft's understanding of C<Shift_JIS>.
+
+JIS has not endorsed the full Microsoft standard however.
+The official C<Shift_JIS> includes only JIS X 0201 and JIS X 0208
+character sets, while Microsoft has always used C<Shift_JIS>
+to encode a wider character repertoire. See C<IANA> registration for
+C<Windows-31J>.
+
+As a historical predecessor, Microsoft's variant
+probably has more rights for the name, though it may be objected
+that Microsoft shouldn't have used JIS as part of the name
+in the first place.
+
+Unambiguous name: C<CP932>. C<IANA> name (also used by Mozilla, and
+provided as an alias by Encode): C<Windows-31J>.
+
+Encode separately supports C<Shift_JIS> and C<cp932>.
+
+=back
+
+=head1 Glossary
+
+=over 2
+
+=item character repertoire
+
+A collection of unique characters.  A I<character> set in the strictest
+sense. At this stage, characters are not numbered.
+
+=item coded character set (CCS)
+
+A character set that is mapped in a way computers can use directly.
+Many character encodings, including EUC, fall in this category.
+
+=item character encoding scheme (CES)
+
+An algorithm to map a character set to a byte sequence.  You don't
+have to be able to tell which character set a given byte sequence
+belongs.  7-bit ISO-2022 is a CES but it cannot be a CCS.  EUC is an
+example of being both a CCS and CES.
+
+=item charset (in MIME context)
+
+has long been used in the meaning of C<encoding>, CES.
+
+While the word combination C<character set> has lost this meaning
+in MIME context since [RFC 2130], the C<charset> abbreviation has
+retained it. This is how [RFC 2277] and [RFC 2278] bless C<charset>:
+
+ This document uses the term "charset" to mean a set of rules for
+ mapping from a sequence of octets to a sequence of characters, such
+ as the combination of a coded character set and a character encoding
+ scheme; this is also what is used as an identifier in MIME "charset="
+ parameters, and registered in the IANA charset registry ...  (Note
+ that this is NOT a term used by other standards bodies, such as ISO).
+ [RFC 2277]
+
+=item EUC
+
+Extended Unix Character.  See ISO-2022.
+
+=item ISO-2022
+
+A CES that was carefully designed to coexist with ASCII.  There are a 7
+bit version and an 8 bit version.  
+
+The 7 bit version switches character set via escape sequence so it
+cannot form a CCS.  Since this is more difficult to handle in programs
+than the 8 bit version, the 7 bit version is not very popular except for
+iso-2022-jp, the I<de facto> standard CES for e-mails.
+
+The 8 bit version can form a CCS.  EUC and ISO-8859 are two examples
+thereof.  Pre-5.6 perl could use them as string literals.
+
+=item UCS
+
+Short for I<Universal Character Set>.  When you say just UCS, it means
+I<Unicode>.
+
+=item UCS-2
+
+ISO/IEC 10646 encoding form: Universal Character Set coded in two
+octets.
+
+=item Unicode
+
+A character set that aims to include all character repertoires of the
+world.  Many character sets in various national as well as industrial
+standards have become, in a way, just subsets of Unicode.
+
+=item UTF
+
+Short for I<Unicode Transformation Format>.  Determines how to map a
+Unicode character into a byte sequence.
+
+=item UTF-16
+
+A UTF in 16-bit encoding.  Can either be in big endian or little
+endian.  The big endian version is called UTF-16BE (equal to UCS-2 + 
+surrogate support) and the little endian version is called UTF-16LE.
+
+=back
+
+=head1 See Also
+
+L<Encode>, 
+L<Encode::Byte>, 
+L<Encode::CN>, L<Encode::JP>, L<Encode::KR>, L<Encode::TW>,
+L<Encode::EBCDIC>, L<Encode::Symbol>
+L<Encode::MIME::Header>, L<Encode::Guess>
+
+=head1 References
+
+=over 2
+
+=item ECMA
+
+European Computer Manufacturers Association
+L<http://www.ecma.ch>
+
+=over 2
+
+=item ECMA-035 (eq C<ISO-2022>)
+
+L<http://www.ecma.ch/ecma1/STAND/ECMA-035.HTM> 
+
+The specification of ISO-2022 is available from the link above.
+
+=back
+
+=item IANA
+
+Internet Assigned Numbers Authority
+L<http://www.iana.org/>
+
+=over 2
+
+=item Assigned Charset Names by IANA
+
+L<http://www.iana.org/assignments/character-sets>
+
+Most of the C<canonical names> in Encode derive from this list
+so you can directly apply the string you have extracted from MIME
+header of mails and web pages.
+
+=back
+
+=item ISO
+
+International Organization for Standardization
+L<http://www.iso.ch/>
+
+=item RFC
+
+Request For Comments -- need I say more?
+L<http://www.rfc-editor.org/>, L<http://www.ietf.org/rfc.html>,
+L<http://www.faqs.org/rfcs/>
+
+=item UC
+
+Unicode Consortium
+L<http://www.unicode.org/>
+
+=over 2
+
+=item Unicode Glossary
+
+L<http://www.unicode.org/glossary/>
+
+The glossary of this document is based upon this site.
+
+=back
+
+=back
+
+=head2 Other Notable Sites
+
+=over 2
+
+=item czyborra.com
+
+L<http://czyborra.com/>
+
+Contains a lot of useful information, especially gory details of ISO
+vs. vendor mappings.
+
+=item CJK.inf
+
+L<http://examples.oreilly.com/cjkvinfo/doc/cjk.inf>
+
+Somewhat obsolete (last update in 1996), but still useful.  Also try
+
+L<ftp://ftp.oreilly.com/pub/examples/nutshell/cjkv/pdf/GB18030_Summary.pdf>
+
+You will find brief info on C<EUC-CN>, C<GBK> and mostly on C<GB 18030>.
+
+=item Jungshik Shin's Hangul FAQ
+
+L<http://jshin.net/faq>
+
+And especially its subject 8.
+
+L<http://jshin.net/faq/qa8.html>
+
+A comprehensive overview of the Korean (C<KS *>) standards.
+
+=item debian.org: "Introduction to i18n"
+
+A brief description for most of the mentioned CJK encodings is
+contained in
+L<http://www.debian.org/doc/manuals/intro-i18n/ch-codes.en.html>
+
+=back
+
+=head2 Offline sources
+
+=over 2
+
+=item C<CJKV Information Processing> by Ken Lunde
+
+CJKV Information Processing
+1999 O'Reilly & Associates, ISBN : 1-56592-224-7
+
+The modern successor of C<CJK.inf>.
+
+Features a comprehensive coverage of CJKV character sets and
+encodings along with many other issues faced by anyone trying
+to better support CJKV languages/scripts in all the areas of
+information processing.
+
+To purchase this book, visit
+L<http://oreilly.com/catalog/9780596514471/>
+or your favourite bookstore.
+
+=back
+
+=cut
--- a/database/perl/lib/Encode/Symbol.pm
+++ b/database/perl/lib/Encode/Symbol.pm
@@ -0,0 +1,44 @@
+package Encode::Symbol;
+use strict;
+use warnings;
+use Encode;
+our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+use XSLoader;
+XSLoader::load( __PACKAGE__, $VERSION );
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::Symbol - Symbol Encodings
+
+=head1 SYNOPSIS
+
+    use Encode qw/encode decode/; 
+    $symbol  = encode("symbol", $utf8); # loads Encode::Symbol implicitly
+    $utf8 = decode("", $symbol);        # ditto
+
+=head1 ABSTRACT
+
+This module implements symbol and dingbats encodings.  Encodings
+supported are as follows.   
+
+  Canonical   Alias		Description
+  --------------------------------------------------------------------
+  symbol
+  dingbats
+  AdobeZDingbat
+  AdobeSymbol
+  MacDingbats
+
+=head1 DESCRIPTION
+
+To find out how to use this module in detail, see L<Encode>.
+
+=head1 SEE ALSO
+
+L<Encode>
+
+=cut
--- a/database/perl/lib/Encode/TW.pm
+++ b/database/perl/lib/Encode/TW.pm
@@ -0,0 +1,75 @@
+package Encode::TW;
+BEGIN {
+    if ( ord("A") == 193 ) {
+        die "Encode::TW not supported on EBCDIC\n";
+    }
+}
+use strict;
+use warnings;
+use Encode;
+our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+use XSLoader;
+XSLoader::load( __PACKAGE__, $VERSION );
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::TW - Taiwan-based Chinese Encodings
+
+=head1 SYNOPSIS
+
+    use Encode qw/encode decode/; 
+    $big5 = encode("big5", $utf8); # loads Encode::TW implicitly
+    $utf8 = decode("big5", $big5); # ditto
+
+=head1 DESCRIPTION
+
+This module implements tradition Chinese charset encodings as used
+in Taiwan and Hong Kong.
+Encodings supported are as follows.
+
+  Canonical   Alias		Description
+  --------------------------------------------------------------------
+  big5-eten   /\bbig-?5$/i	Big5 encoding (with ETen extensions)
+          /\bbig5-?et(en)?$/i
+          /\btca-?big5$/i
+  big5-hkscs  /\bbig5-?hk(scs)?$/i
+              /\bhk(scs)?-?big5$/i
+                                Big5 + Cantonese characters in Hong Kong
+  MacChineseTrad		Big5 + Apple Vendor Mappings
+  cp950		                Code Page 950 
+                                = Big5 + Microsoft vendor mappings
+  --------------------------------------------------------------------
+
+To find out how to use this module in detail, see L<Encode>.
+
+=head1 NOTES
+
+Due to size concerns, C<EUC-TW> (Extended Unix Character), C<CCCII>
+(Chinese Character Code for Information Interchange), C<BIG5PLUS>
+(CMEX's Big5+) and C<BIG5EXT> (CMEX's Big5e) are distributed separately
+on CPAN, under the name L<Encode::HanExtra>. That module also contains
+extra China-based encodings.
+
+=head1 BUGS
+
+Since the original C<big5> encoding (1984) is not supported anywhere
+(glibc and DOS-based systems uses C<big5> to mean C<big5-eten>; Microsoft
+uses C<big5> to mean C<cp950>), a conscious decision was made to alias
+C<big5> to C<big5-eten>, which is the de facto superset of the original
+big5.
+
+The C<CNS11643> encoding files are not complete. For common C<CNS11643>
+manipulation, please use C<EUC-TW> in L<Encode::HanExtra>, which contains
+planes 1-7.
+
+The ASCII region (0x00-0x7f) is preserved for all encodings, even
+though this conflicts with mappings by the Unicode Consortium.
+
+=head1 SEE ALSO
+
+L<Encode>
+
+=cut
--- a/database/perl/lib/Encode/Unicode.pm
+++ b/database/perl/lib/Encode/Unicode.pm
@@ -0,0 +1,272 @@
+package Encode::Unicode;
+
+use strict;
+use warnings;
+
+our $VERSION = do { my @r = ( q$Revision: 2.18 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+use XSLoader;
+XSLoader::load( __PACKAGE__, $VERSION );
+
+#
+# Object Generator 8 transcoders all at once!
+#
+
+use Encode ();
+
+our %BOM_Unknown = map { $_ => 1 } qw(UTF-16 UTF-32);
+
+for my $name (
+    qw(UTF-16 UTF-16BE UTF-16LE
+    UTF-32 UTF-32BE UTF-32LE
+    UCS-2BE  UCS-2LE)
+  )
+{
+    my ( $size, $endian, $ucs2, $mask );
+    $name =~ /^(\w+)-(\d+)(\w*)$/o;
+    if ( $ucs2 = ( $1 eq 'UCS' ) ) {
+        $size = 2;
+    }
+    else {
+        $size = $2 / 8;
+    }
+    $endian = ( $3 eq 'BE' ) ? 'n' : ( $3 eq 'LE' ) ? 'v' : '';
+    $size == 4 and $endian = uc($endian);
+
+    my $obj = bless {
+        Name   => $name,
+        size   => $size,
+        endian => $endian,
+        ucs2   => $ucs2,
+    } => __PACKAGE__;
+    Encode::define_encoding($obj, $name);
+}
+
+use parent qw(Encode::Encoding);
+
+sub renew {
+    my $self = shift;
+    $BOM_Unknown{ $self->name } or return $self;
+    my $clone = bless {%$self} => ref($self);
+    $clone->{renewed}++;    # so the caller knows it is renewed.
+    return $clone;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::Unicode -- Various Unicode Transformation Formats
+
+=cut
+
+=head1 SYNOPSIS
+
+    use Encode qw/encode decode/;
+    $ucs2 = encode("UCS-2BE", $utf8);
+    $utf8 = decode("UCS-2BE", $ucs2);
+
+=head1 ABSTRACT
+
+This module implements all Character Encoding Schemes of Unicode that
+are officially documented by Unicode Consortium (except, of course,
+for UTF-8, which is a native format in perl).
+
+=over 4
+
+=item L<http://www.unicode.org/glossary/> says:
+
+I<Character Encoding Scheme> A character encoding form plus byte
+serialization. There are Seven character encoding schemes in Unicode:
+UTF-8, UTF-16, UTF-16BE, UTF-16LE, UTF-32 (UCS-4), UTF-32BE (UCS-4BE) and
+UTF-32LE (UCS-4LE), and UTF-7.
+
+Since UTF-7 is a 7-bit (re)encoded version of UTF-16BE, It is not part of
+Unicode's Character Encoding Scheme.  It is separately implemented in
+Encode::Unicode::UTF7.  For details see L<Encode::Unicode::UTF7>.
+
+=item Quick Reference
+
+                Decodes from ord(N)           Encodes chr(N) to...
+       octet/char BOM S.P d800-dfff  ord > 0xffff     \x{1abcd} ==
+  ---------------+-----------------+------------------------------
+  UCS-2BE       2   N   N  is bogus                  Not Available
+  UCS-2LE       2   N   N     bogus                  Not Available
+  UTF-16      2/4   Y   Y  is   S.P           S.P            BE/LE
+  UTF-16BE    2/4   N   Y       S.P           S.P    0xd82a,0xdfcd
+  UTF-16LE    2/4   N   Y       S.P           S.P    0x2ad8,0xcddf
+  UTF-32        4   Y   -  is bogus         As is            BE/LE
+  UTF-32BE      4   N   -     bogus         As is       0x0001abcd
+  UTF-32LE      4   N   -     bogus         As is       0xcdab0100
+  UTF-8       1-4   -   -     bogus   >= 4 octets   \xf0\x9a\af\8d
+  ---------------+-----------------+------------------------------
+
+=back
+
+=head1 Size, Endianness, and BOM
+
+You can categorize these CES by 3 criteria:  size of each character,
+endianness, and Byte Order Mark.
+
+=head2 by size
+
+UCS-2 is a fixed-length encoding with each character taking 16 bits.
+It B<does not> support I<surrogate pairs>.  When a surrogate pair
+is encountered during decode(), its place is filled with \x{FFFD}
+if I<CHECK> is 0, or the routine croaks if I<CHECK> is 1.  When a
+character whose ord value is larger than 0xFFFF is encountered,
+its place is filled with \x{FFFD} if I<CHECK> is 0, or the routine
+croaks if I<CHECK> is 1.
+
+UTF-16 is almost the same as UCS-2 but it supports I<surrogate pairs>.
+When it encounters a high surrogate (0xD800-0xDBFF), it fetches the
+following low surrogate (0xDC00-0xDFFF) and C<desurrogate>s them to
+form a character.  Bogus surrogates result in death.  When \x{10000}
+or above is encountered during encode(), it C<ensurrogate>s them and
+pushes the surrogate pair to the output stream.
+
+UTF-32 (UCS-4) is a fixed-length encoding with each character taking 32 bits.
+Since it is 32-bit, there is no need for I<surrogate pairs>.
+
+=head2 by endianness
+
+The first (and now failed) goal of Unicode was to map all character
+repertoires into a fixed-length integer so that programmers are happy.
+Since each character is either a I<short> or I<long> in C, you have to
+pay attention to the endianness of each platform when you pass data
+to one another.
+
+Anything marked as BE is Big Endian (or network byte order) and LE is
+Little Endian (aka VAX byte order).  For anything not marked either
+BE or LE, a character called Byte Order Mark (BOM) indicating the
+endianness is prepended to the string.
+
+CAVEAT: Though BOM in utf8 (\xEF\xBB\xBF) is valid, it is meaningless
+and as of this writing Encode suite just leave it as is (\x{FeFF}).
+
+=over 4
+
+=item BOM as integer when fetched in network byte order
+
+              16         32 bits/char
+  -------------------------
+  BE      0xFeFF 0x0000FeFF
+  LE      0xFFFe 0xFFFe0000
+  -------------------------
+
+=back
+
+This modules handles the BOM as follows.
+
+=over 4
+
+=item *
+
+When BE or LE is explicitly stated as the name of encoding, BOM is
+simply treated as a normal character (ZERO WIDTH NO-BREAK SPACE).
+
+=item *
+
+When BE or LE is omitted during decode(), it checks if BOM is at the
+beginning of the string; if one is found, the endianness is set to
+what the BOM says.  
+
+=item *
+
+Default Byte Order
+
+When no BOM is found, Encode 2.76 and blow croaked.  Since Encode
+2.77, it falls back to BE accordingly to RFC2781 and the Unicode
+Standard version 8.0
+
+=item *
+
+When BE or LE is omitted during encode(), it returns a BE-encoded
+string with BOM prepended.  So when you want to encode a whole text
+file, make sure you encode() the whole text at once, not line by line
+or each line, not file, will have a BOM prepended.
+
+=item *
+
+C<UCS-2> is an exception.  Unlike others, this is an alias of UCS-2BE.
+UCS-2 is already registered by IANA and others that way.
+
+=back
+
+=head1 Surrogate Pairs
+
+To say the least, surrogate pairs were the biggest mistake of the
+Unicode Consortium.  But according to the late Douglas Adams in I<The
+Hitchhiker's Guide to the Galaxy> Trilogy, C<In the beginning the
+Universe was created. This has made a lot of people very angry and
+been widely regarded as a bad move>.  Their mistake was not of this
+magnitude so let's forgive them.
+
+(I don't dare make any comparison with Unicode Consortium and the
+Vogons here ;)  Or, comparing Encode to Babel Fish is completely
+appropriate -- if you can only stick this into your ear :)
+
+Surrogate pairs were born when the Unicode Consortium finally
+admitted that 16 bits were not big enough to hold all the world's
+character repertoires.  But they already made UCS-2 16-bit.  What
+do we do?
+
+Back then, the range 0xD800-0xDFFF was not allocated.  Let's split
+that range in half and use the first half to represent the C<upper
+half of a character> and the second half to represent the C<lower
+half of a character>.  That way, you can represent 1024 * 1024 =
+1048576 more characters.  Now we can store character ranges up to
+\x{10ffff} even with 16-bit encodings.  This pair of half-character is
+now called a I<surrogate pair> and UTF-16 is the name of the encoding
+that embraces them.
+
+Here is a formula to ensurrogate a Unicode character \x{10000} and
+above;
+
+  $hi = ($uni - 0x10000) / 0x400 + 0xD800;
+  $lo = ($uni - 0x10000) % 0x400 + 0xDC00;
+
+And to desurrogate;
+
+ $uni = 0x10000 + ($hi - 0xD800) * 0x400 + ($lo - 0xDC00);
+
+Note this move has made \x{D800}-\x{DFFF} into a forbidden zone but
+perl does not prohibit the use of characters within this range.  To perl,
+every one of \x{0000_0000} up to \x{ffff_ffff} (*) is I<a character>.
+
+  (*) or \x{ffff_ffff_ffff_ffff} if your perl is compiled with 64-bit
+  integer support!
+
+=head1 Error Checking
+
+Unlike most encodings which accept various ways to handle errors,
+Unicode encodings simply croaks.
+
+  % perl -MEncode -e'$_ = "\xfe\xff\xd8\xd9\xda\xdb\0\n"' \
+         -e'Encode::from_to($_, "utf16","shift_jis", 0); print'
+  UTF-16:Malformed LO surrogate d8d9 at /path/to/Encode.pm line 184.
+  % perl -MEncode -e'$a = "BOM missing"' \
+         -e' Encode::from_to($a, "utf16", "shift_jis", 0); print'
+  UTF-16:Unrecognised BOM 424f at /path/to/Encode.pm line 184.
+
+Unlike other encodings where mappings are not one-to-one against
+Unicode, UTFs are supposed to map 100% against one another.  So Encode
+is more strict on UTFs.
+
+Consider that "division by zero" of Encode :)
+
+=head1 SEE ALSO
+
+L<Encode>, L<Encode::Unicode::UTF7>, L<http://www.unicode.org/glossary/>,
+L<http://www.unicode.org/unicode/faq/utf_bom.html>,
+
+RFC 2781 L<http://www.ietf.org/rfc/rfc2781.txt>,
+
+The whole Unicode standard L<http://www.unicode.org/unicode/uni2book/u2.html>
+
+Ch. 15, pp. 403 of C<Programming Perl (3rd Edition)>
+by Larry Wall, Tom Christiansen, Jon Orwant;
+O'Reilly & Associates; ISBN 0-596-00027-8
+
+=cut
--- a/database/perl/lib/Encode/Unicode/UTF7.pm
+++ b/database/perl/lib/Encode/Unicode/UTF7.pm
@@ -0,0 +1,133 @@
+#
+# $Id: UTF7.pm,v 2.10 2017/06/10 17:23:50 dankogai Exp $
+#
+package Encode::Unicode::UTF7;
+use strict;
+use warnings;
+use parent qw(Encode::Encoding);
+__PACKAGE__->Define('UTF-7');
+our $VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+use MIME::Base64;
+use Encode qw(find_encoding);
+
+#
+# Algorithms taken from Unicode::String by Gisle Aas
+#
+
+our $OPTIONAL_DIRECT_CHARS = 1;
+my $specials = quotemeta "\'(),-./:?";
+$OPTIONAL_DIRECT_CHARS
+  and $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}";
+
+# \s will not work because it matches U+3000 DEOGRAPHIC SPACE
+# We use qr/[\n\r\t\ ] instead
+my $re_asis    = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/;
+my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/;
+my $e_utf16    = find_encoding("UTF-16BE");
+
+sub needs_lines { 1 }
+
+sub encode($$;$) {
+    my ( $obj, $str, $chk ) = @_;
+    return undef unless defined $str;
+    my $len = length($str);
+    pos($str) = 0;
+    my $bytes = substr($str, 0, 0); # to propagate taintedness
+    while ( pos($str) < $len ) {
+        if ( $str =~ /\G($re_asis+)/ogc ) {
+	    my $octets = $1;
+	    utf8::downgrade($octets);
+	    $bytes .= $octets;
+        }
+        elsif ( $str =~ /\G($re_encoded+)/ogsc ) {
+            if ( $1 eq "+" ) {
+                $bytes .= "+-";
+            }
+            else {
+                my $s = $1;
+                my $base64 = encode_base64( $e_utf16->encode($s), '' );
+                $base64 =~ s/=+$//;
+                $bytes .= "+$base64-";
+            }
+        }
+        else {
+            die "This should not happen! (pos=" . pos($str) . ")";
+        }
+    }
+    $_[1] = '' if $chk;
+    return $bytes;
+}
+
+sub decode($$;$) {
+    use re 'taint';
+    my ( $obj, $bytes, $chk ) = @_;
+    return undef unless defined $bytes;
+    my $len = length($bytes);
+    my $str = substr($bytes, 0, 0); # to propagate taintedness;
+    pos($bytes) = 0;
+    no warnings 'uninitialized';
+    while ( pos($bytes) < $len ) {
+        if ( $bytes =~ /\G([^+]+)/ogc ) {
+            $str .= $1;
+        }
+        elsif ( $bytes =~ /\G\+-/ogc ) {
+            $str .= "+";
+        }
+        elsif ( $bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc ) {
+            my $base64 = $1;
+            my $pad    = length($base64) % 4;
+            $base64 .= "=" x ( 4 - $pad ) if $pad;
+            $str .= $e_utf16->decode( decode_base64($base64) );
+        }
+        elsif ( $bytes =~ /\G\+/ogc ) {
+            $^W and warn "Bad UTF7 data escape";
+            $str .= "+";
+        }
+        else {
+            die "This should not happen " . pos($bytes);
+        }
+    }
+    $_[1] = '' if $chk;
+    return $str;
+}
+1;
+__END__
+
+=head1 NAME
+
+Encode::Unicode::UTF7 -- UTF-7 encoding
+
+=head1 SYNOPSIS
+
+    use Encode qw/encode decode/; 
+    $utf7 = encode("UTF-7", $utf8);
+    $utf8 = decode("UTF-7", $ucs2);
+
+=head1 ABSTRACT
+
+This module implements UTF-7 encoding documented in RFC 2152.  UTF-7,
+as its name suggests, is a 7-bit re-encoded version of UTF-16BE.  It
+is designed to be MTA-safe and expected to be a standard way to
+exchange Unicoded mails via mails.  But with the advent of UTF-8 and
+8-bit compliant MTAs, UTF-7 is hardly ever used.
+
+UTF-7 was not supported by Encode until version 1.95 because of that.
+But Unicode::String, a module by Gisle Aas which adds Unicode supports
+to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added
+so Encode can supersede Unicode::String 100%.
+
+=head1 In Practice
+
+When you want to encode Unicode for mails and web pages, however, do
+not use UTF-7 unless you are sure your recipients and readers can
+handle it.  Very few MUAs and WWW Browsers support these days (only
+Mozilla seems to support one).  For general cases, use UTF-8 for
+message body and MIME-Header for header instead.
+
+=head1 SEE ALSO
+
+L<Encode>, L<Encode::Unicode>, L<Unicode::String>
+
+RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt>
+
+=cut
--- a/database/perl/lib/Encode/_PM.e2x
+++ b/database/perl/lib/Encode/_PM.e2x
@@ -0,0 +1,23 @@
+package Encode::$_Name_;
+our $VERSION = "0.01";
+ 
+use Encode;
+use XSLoader;
+XSLoader::load(__PACKAGE__,$VERSION);
+
+1;
+__END__
+
+=head1 NAME
+ 
+Encode::$_Name_ - New Encoding
+ 
+=head1 SYNOPSIS
+
+You got to fill this in!
+
+=head1 SEE ALSO
+
+L<Encode>
+
+=cut
--- a/database/perl/lib/Encode/_T.e2x
+++ b/database/perl/lib/Encode/_T.e2x
@@ -0,0 +1,9 @@
+use strict;
+# Adjust the number here!
+use Test::More tests => 2;
+
+BEGIN {
+    use_ok('Encode');
+    use_ok('Encode::$_Name_');
+}
+# Add more test here!
--- a/database/perl/lib/Encode/encode.h
+++ b/database/perl/lib/Encode/encode.h