Initial Commit
This commit is contained in:
373
database/perl/vendor/lib/Encode/Locale.pm
vendored
Normal file
373
database/perl/vendor/lib/Encode/Locale.pm
vendored
Normal file
@@ -0,0 +1,373 @@
|
||||
package Encode::Locale;
|
||||
|
||||
use strict;
|
||||
our $VERSION = "1.05";
|
||||
|
||||
use base 'Exporter';
|
||||
our @EXPORT_OK = qw(
|
||||
decode_argv env
|
||||
$ENCODING_LOCALE $ENCODING_LOCALE_FS
|
||||
$ENCODING_CONSOLE_IN $ENCODING_CONSOLE_OUT
|
||||
);
|
||||
|
||||
use Encode ();
|
||||
use Encode::Alias ();
|
||||
|
||||
our $ENCODING_LOCALE;
|
||||
our $ENCODING_LOCALE_FS;
|
||||
our $ENCODING_CONSOLE_IN;
|
||||
our $ENCODING_CONSOLE_OUT;
|
||||
|
||||
sub DEBUG () { 0 }
|
||||
|
||||
sub _init {
|
||||
if ($^O eq "MSWin32") {
|
||||
unless ($ENCODING_LOCALE) {
|
||||
# Try to obtain what the Windows ANSI code page is
|
||||
eval {
|
||||
unless (defined &GetACP) {
|
||||
require Win32;
|
||||
eval { Win32::GetACP() };
|
||||
*GetACP = sub { &Win32::GetACP } unless $@;
|
||||
}
|
||||
unless (defined &GetACP) {
|
||||
require Win32::API;
|
||||
Win32::API->Import('kernel32', 'int GetACP()');
|
||||
}
|
||||
if (defined &GetACP) {
|
||||
my $cp = GetACP();
|
||||
$ENCODING_LOCALE = "cp$cp" if $cp;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
unless ($ENCODING_CONSOLE_IN) {
|
||||
# only test one since set together
|
||||
unless (defined &GetInputCP) {
|
||||
eval {
|
||||
require Win32;
|
||||
eval { Win32::GetConsoleCP() };
|
||||
# manually "import" it since Win32->import refuses
|
||||
*GetInputCP = sub { &Win32::GetConsoleCP } unless $@;
|
||||
*GetOutputCP = sub { &Win32::GetConsoleOutputCP } unless $@;
|
||||
};
|
||||
unless (defined &GetInputCP) {
|
||||
eval {
|
||||
# try Win32::Console module for codepage to use
|
||||
require Win32::Console;
|
||||
eval { Win32::Console::InputCP() };
|
||||
*GetInputCP = sub { &Win32::Console::InputCP }
|
||||
unless $@;
|
||||
*GetOutputCP = sub { &Win32::Console::OutputCP }
|
||||
unless $@;
|
||||
};
|
||||
}
|
||||
unless (defined &GetInputCP) {
|
||||
# final fallback
|
||||
*GetInputCP = *GetOutputCP = sub {
|
||||
# another fallback that could work is:
|
||||
# reg query HKLM\System\CurrentControlSet\Control\Nls\CodePage /v ACP
|
||||
((qx(chcp) || '') =~ /^Active code page: (\d+)/)
|
||||
? $1 : ();
|
||||
};
|
||||
}
|
||||
}
|
||||
my $cp = GetInputCP();
|
||||
$ENCODING_CONSOLE_IN = "cp$cp" if $cp;
|
||||
$cp = GetOutputCP();
|
||||
$ENCODING_CONSOLE_OUT = "cp$cp" if $cp;
|
||||
}
|
||||
}
|
||||
|
||||
unless ($ENCODING_LOCALE) {
|
||||
eval {
|
||||
require I18N::Langinfo;
|
||||
$ENCODING_LOCALE = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET());
|
||||
|
||||
# Workaround of Encode < v2.25. The "646" encoding alias was
|
||||
# introduced in Encode-2.25, but we don't want to require that version
|
||||
# quite yet. Should avoid the CPAN testers failure reported from
|
||||
# openbsd-4.7/perl-5.10.0 combo.
|
||||
$ENCODING_LOCALE = "ascii" if $ENCODING_LOCALE eq "646";
|
||||
|
||||
# https://rt.cpan.org/Ticket/Display.html?id=66373
|
||||
$ENCODING_LOCALE = "hp-roman8" if $^O eq "hpux" && $ENCODING_LOCALE eq "roman8";
|
||||
};
|
||||
$ENCODING_LOCALE ||= $ENCODING_CONSOLE_IN;
|
||||
}
|
||||
|
||||
if ($^O eq "darwin") {
|
||||
$ENCODING_LOCALE_FS ||= "UTF-8";
|
||||
}
|
||||
|
||||
# final fallback
|
||||
$ENCODING_LOCALE ||= $^O eq "MSWin32" ? "cp1252" : "UTF-8";
|
||||
$ENCODING_LOCALE_FS ||= $ENCODING_LOCALE;
|
||||
$ENCODING_CONSOLE_IN ||= $ENCODING_LOCALE;
|
||||
$ENCODING_CONSOLE_OUT ||= $ENCODING_CONSOLE_IN;
|
||||
|
||||
unless (Encode::find_encoding($ENCODING_LOCALE)) {
|
||||
my $foundit;
|
||||
if (lc($ENCODING_LOCALE) eq "gb18030") {
|
||||
eval {
|
||||
require Encode::HanExtra;
|
||||
};
|
||||
if ($@) {
|
||||
die "Need Encode::HanExtra to be installed to support locale codeset ($ENCODING_LOCALE), stopped";
|
||||
}
|
||||
$foundit++ if Encode::find_encoding($ENCODING_LOCALE);
|
||||
}
|
||||
die "The locale codeset ($ENCODING_LOCALE) isn't one that perl can decode, stopped"
|
||||
unless $foundit;
|
||||
|
||||
}
|
||||
|
||||
# use Data::Dump; ddx $ENCODING_LOCALE, $ENCODING_LOCALE_FS, $ENCODING_CONSOLE_IN, $ENCODING_CONSOLE_OUT;
|
||||
}
|
||||
|
||||
_init();
|
||||
Encode::Alias::define_alias(sub {
|
||||
no strict 'refs';
|
||||
no warnings 'once';
|
||||
return ${"ENCODING_" . uc(shift)};
|
||||
}, "locale");
|
||||
|
||||
sub _flush_aliases {
|
||||
no strict 'refs';
|
||||
for my $a (keys %Encode::Alias::Alias) {
|
||||
if (defined ${"ENCODING_" . uc($a)}) {
|
||||
delete $Encode::Alias::Alias{$a};
|
||||
warn "Flushed alias cache for $a" if DEBUG;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub reinit {
|
||||
$ENCODING_LOCALE = shift;
|
||||
$ENCODING_LOCALE_FS = shift;
|
||||
$ENCODING_CONSOLE_IN = $ENCODING_LOCALE;
|
||||
$ENCODING_CONSOLE_OUT = $ENCODING_LOCALE;
|
||||
_init();
|
||||
_flush_aliases();
|
||||
}
|
||||
|
||||
sub decode_argv {
|
||||
die if defined wantarray;
|
||||
for (@ARGV) {
|
||||
$_ = Encode::decode(locale => $_, @_);
|
||||
}
|
||||
}
|
||||
|
||||
sub env {
|
||||
my $k = Encode::encode(locale => shift);
|
||||
my $old = $ENV{$k};
|
||||
if (@_) {
|
||||
my $v = shift;
|
||||
if (defined $v) {
|
||||
$ENV{$k} = Encode::encode(locale => $v);
|
||||
}
|
||||
else {
|
||||
delete $ENV{$k};
|
||||
}
|
||||
}
|
||||
return Encode::decode(locale => $old) if defined wantarray;
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Encode::Locale - Determine the locale encoding
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use Encode::Locale;
|
||||
use Encode;
|
||||
|
||||
$string = decode(locale => $bytes);
|
||||
$bytes = encode(locale => $string);
|
||||
|
||||
if (-t) {
|
||||
binmode(STDIN, ":encoding(console_in)");
|
||||
binmode(STDOUT, ":encoding(console_out)");
|
||||
binmode(STDERR, ":encoding(console_out)");
|
||||
}
|
||||
|
||||
# Processing file names passed in as arguments
|
||||
my $uni_filename = decode(locale => $ARGV[0]);
|
||||
open(my $fh, "<", encode(locale_fs => $uni_filename))
|
||||
|| die "Can't open '$uni_filename': $!";
|
||||
binmode($fh, ":encoding(locale)");
|
||||
...
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
In many applications it's wise to let Perl use Unicode for the strings it
|
||||
processes. Most of the interfaces Perl has to the outside world are still byte
|
||||
based. Programs therefore need to decode byte strings that enter the program
|
||||
from the outside and encode them again on the way out.
|
||||
|
||||
The POSIX locale system is used to specify both the language conventions
|
||||
requested by the user and the preferred character set to consume and
|
||||
output. The C<Encode::Locale> module looks up the charset and encoding (called
|
||||
a CODESET in the locale jargon) and arranges for the L<Encode> module to know
|
||||
this encoding under the name "locale". It means bytes obtained from the
|
||||
environment can be converted to Unicode strings by calling C<<
|
||||
Encode::encode(locale => $bytes) >> and converted back again with C<<
|
||||
Encode::decode(locale => $string) >>.
|
||||
|
||||
Where file systems interfaces pass file names in and out of the program we also
|
||||
need care. The trend is for operating systems to use a fixed file encoding
|
||||
that don't actually depend on the locale; and this module determines the most
|
||||
appropriate encoding for file names. The L<Encode> module will know this
|
||||
encoding under the name "locale_fs". For traditional Unix systems this will
|
||||
be an alias to the same encoding as "locale".
|
||||
|
||||
For programs running in a terminal window (called a "Console" on some systems)
|
||||
the "locale" encoding is usually a good choice for what to expect as input and
|
||||
output. Some systems allows us to query the encoding set for the terminal and
|
||||
C<Encode::Locale> will do that if available and make these encodings known
|
||||
under the C<Encode> aliases "console_in" and "console_out". For systems where
|
||||
we can't determine the terminal encoding these will be aliased as the same
|
||||
encoding as "locale". The advice is to use "console_in" for input known to
|
||||
come from the terminal and "console_out" for output to the terminal.
|
||||
|
||||
In addition to arranging for various Encode aliases the following functions and
|
||||
variables are provided:
|
||||
|
||||
=over
|
||||
|
||||
=item decode_argv( )
|
||||
|
||||
=item decode_argv( Encode::FB_CROAK )
|
||||
|
||||
This will decode the command line arguments to perl (the C<@ARGV> array) in-place.
|
||||
|
||||
The function will by default replace characters that can't be decoded by
|
||||
"\x{FFFD}", the Unicode replacement character.
|
||||
|
||||
Any argument provided is passed as CHECK to underlying Encode::decode() call.
|
||||
Pass the value C<Encode::FB_CROAK> to have the decoding croak if not all the
|
||||
command line arguments can be decoded. See L<Encode/"Handling Malformed Data">
|
||||
for details on other options for CHECK.
|
||||
|
||||
=item env( $uni_key )
|
||||
|
||||
=item env( $uni_key => $uni_value )
|
||||
|
||||
Interface to get/set environment variables. Returns the current value as a
|
||||
Unicode string. The $uni_key and $uni_value arguments are expected to be
|
||||
Unicode strings as well. Passing C<undef> as $uni_value deletes the
|
||||
environment variable named $uni_key.
|
||||
|
||||
The returned value will have the characters that can't be decoded replaced by
|
||||
"\x{FFFD}", the Unicode replacement character.
|
||||
|
||||
There is no interface to request alternative CHECK behavior as for
|
||||
decode_argv(). If you need that you need to call encode/decode yourself.
|
||||
For example:
|
||||
|
||||
my $key = Encode::encode(locale => $uni_key, Encode::FB_CROAK);
|
||||
my $uni_value = Encode::decode(locale => $ENV{$key}, Encode::FB_CROAK);
|
||||
|
||||
=item reinit( )
|
||||
|
||||
=item reinit( $encoding )
|
||||
|
||||
Reinitialize the encodings from the locale. You want to call this function if
|
||||
you changed anything in the environment that might influence the locale.
|
||||
|
||||
This function will croak if the determined encoding isn't recognized by
|
||||
the Encode module.
|
||||
|
||||
With argument force $ENCODING_... variables to set to the given value.
|
||||
|
||||
=item $ENCODING_LOCALE
|
||||
|
||||
The encoding name determined to be suitable for the current locale.
|
||||
L<Encode> know this encoding as "locale".
|
||||
|
||||
=item $ENCODING_LOCALE_FS
|
||||
|
||||
The encoding name determined to be suitable for file system interfaces
|
||||
involving file names.
|
||||
L<Encode> know this encoding as "locale_fs".
|
||||
|
||||
=item $ENCODING_CONSOLE_IN
|
||||
|
||||
=item $ENCODING_CONSOLE_OUT
|
||||
|
||||
The encodings to be used for reading and writing output to the a console.
|
||||
L<Encode> know these encodings as "console_in" and "console_out".
|
||||
|
||||
=back
|
||||
|
||||
=head1 NOTES
|
||||
|
||||
This table summarizes the mapping of the encodings set up
|
||||
by the C<Encode::Locale> module:
|
||||
|
||||
Encode | | |
|
||||
Alias | Windows | Mac OS X | POSIX
|
||||
------------+---------+--------------+------------
|
||||
locale | ANSI | nl_langinfo | nl_langinfo
|
||||
locale_fs | ANSI | UTF-8 | nl_langinfo
|
||||
console_in | OEM | nl_langinfo | nl_langinfo
|
||||
console_out | OEM | nl_langinfo | nl_langinfo
|
||||
|
||||
=head2 Windows
|
||||
|
||||
Windows has basically 2 sets of APIs. A wide API (based on passing UTF-16
|
||||
strings) and a byte based API based a character set called ANSI. The
|
||||
regular Perl interfaces to the OS currently only uses the ANSI APIs.
|
||||
Unfortunately ANSI is not a single character set.
|
||||
|
||||
The encoding that corresponds to ANSI varies between different editions of
|
||||
Windows. For many western editions of Windows ANSI corresponds to CP-1252
|
||||
which is a character set similar to ISO-8859-1. Conceptually the ANSI
|
||||
character set is a similar concept to the POSIX locale CODESET so this module
|
||||
figures out what the ANSI code page is and make this available as
|
||||
$ENCODING_LOCALE and the "locale" Encoding alias.
|
||||
|
||||
Windows systems also operate with another byte based character set.
|
||||
It's called the OEM code page. This is the encoding that the Console
|
||||
takes as input and output. It's common for the OEM code page to
|
||||
differ from the ANSI code page.
|
||||
|
||||
=head2 Mac OS X
|
||||
|
||||
On Mac OS X the file system encoding is always UTF-8 while the locale
|
||||
can otherwise be set up as normal for POSIX systems.
|
||||
|
||||
File names on Mac OS X will at the OS-level be converted to
|
||||
NFD-form. A file created by passing a NFC-filename will come
|
||||
in NFD-form from readdir(). See L<Unicode::Normalize> for details
|
||||
of NFD/NFC.
|
||||
|
||||
Actually, Apple does not follow the Unicode NFD standard since not all
|
||||
character ranges are decomposed. The claim is that this avoids problems with
|
||||
round trip conversions from old Mac text encodings. See L<Encode::UTF8Mac> for
|
||||
details.
|
||||
|
||||
=head2 POSIX (Linux and other Unixes)
|
||||
|
||||
File systems might vary in what encoding is to be used for
|
||||
filenames. Since this module has no way to actually figure out
|
||||
what the is correct it goes with the best guess which is to
|
||||
assume filenames are encoding according to the current locale.
|
||||
Users are advised to always specify UTF-8 as the locale charset.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<I18N::Langinfo>, L<Encode>, L<Term::Encoding>
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Copyright 2010 Gisle Aas <gisle@aas.no>.
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the same terms as Perl itself.
|
||||
|
||||
=cut
|
||||
87
database/perl/vendor/lib/Encode/compat.pm
vendored
Normal file
87
database/perl/vendor/lib/Encode/compat.pm
vendored
Normal file
@@ -0,0 +1,87 @@
|
||||
# $File: //member/autrijus/Encode-compat/lib/Encode/compat.pm $ $Author: autrijus $
|
||||
# $Revision: #7 $ $Change: 10735 $ $DateTime: 2004/06/03 14:08:57 $
|
||||
|
||||
package Encode::compat;
|
||||
$Encode::compat::VERSION = '0.07';
|
||||
|
||||
use strict;
|
||||
|
||||
if ($] >= 5.007001 or $INC{'Encode.pm'}) {
|
||||
# nothing happens -- Encode.pm already available.
|
||||
}
|
||||
elsif ($] >= 5.006001 and $] <= 5.007) {
|
||||
require Encode::compat::Alias;
|
||||
$INC{'Encode/Alias.pm'} = $INC{'Encode/compat/Alias.pm'};
|
||||
|
||||
require Encode::compat::common;
|
||||
require Encode::compat::5006001;
|
||||
$INC{'Encode.pm'} = __FILE__;
|
||||
}
|
||||
else {
|
||||
die "Encode.pm compatibility layer for $] not yet available.";
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Encode::compat - Encode.pm emulation layer
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
This document describes version 0.07 of Encode::compat, released
|
||||
June 3, 2004.
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use Encode::compat; # a no-op for Perl v5.7.1+
|
||||
use Encode qw(...); # all constants and imports works transparently
|
||||
|
||||
# use Encode functions as normal
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
WARNING: THIS IS A PROOF-OF-CONCEPT. Most functions are incomplete.
|
||||
All implementation details are subject to change!
|
||||
|
||||
This module provide a compatibility layer for B<Encode.pm> users on perl
|
||||
versions earlier than v5.7.1. It translates whatever call it receives
|
||||
into B<Text::Iconv>, or (in the future) B<Unicode::MapUTF8> to perform
|
||||
the actual work.
|
||||
|
||||
The C<is_utf8()>, C<_utf8_on()> and C<_utf8_off()> calls are performed
|
||||
by the method native to the perl version -- 5.6.1 would use
|
||||
C<pack>/C<unpack>, 5.6.0 uses C<tr//CU>, etc.
|
||||
|
||||
Theoretically, it could be backported to 5.005 and earlier, with none of
|
||||
the unicode-related semantics available, and serves only as a
|
||||
abstraction layer above C<Text::Iconv>, C<Unicode::MapUTF8> and possibly
|
||||
other transcoding modules.
|
||||
|
||||
=head1 CAVEATS
|
||||
|
||||
Currently, this module only support 5.6.1+, and merely provides the three
|
||||
utility function above (C<encode()>, C<decode()> and C<from_to()>), with
|
||||
a very kludgy C<FB_HTMLCREF> fallback against C<latin-1> in
|
||||
C<from_to()>.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<Encode>, L<perlunicode>
|
||||
|
||||
=head1 AUTHORS
|
||||
|
||||
Autrijus Tang E<lt>autrijus@autrijus.orgE<gt>
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
Copyright 2002, 2003, 2004 by Autrijus Tang E<lt>autrijus@autrijus.orgE<gt>.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the same terms as Perl itself.
|
||||
|
||||
See L<http://www.perl.com/perl/misc/Artistic.html>
|
||||
|
||||
=cut
|
||||
32
database/perl/vendor/lib/Encode/compat/5006001.pm
vendored
Normal file
32
database/perl/vendor/lib/Encode/compat/5006001.pm
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
# $File: //member/autrijus/Encode-compat/lib/Encode/compat/5006001.pm $ $Author: autrijus $
|
||||
# $Revision: #3 $ $Change: 2534 $ $DateTime: 2002/12/02 00:33:16 $
|
||||
|
||||
package Encode::compat::5006001;
|
||||
our $VERSION = '0.05';
|
||||
|
||||
1;
|
||||
|
||||
package Encode;
|
||||
|
||||
use strict;
|
||||
use base 'Exporter';
|
||||
no warnings 'redefine';
|
||||
|
||||
sub _utf8_on {
|
||||
$_[0] = pack('U*', unpack('U0U*', $_[0]))
|
||||
}
|
||||
|
||||
sub _utf8_off {
|
||||
$_[0] = pack('C*', unpack('C*', $_[0]))
|
||||
}
|
||||
|
||||
sub is_utf8 {
|
||||
# XXX: got any better ideas?
|
||||
use utf8;
|
||||
foreach my $char (split(//, $_[0])) {
|
||||
return 1 if ord($char) > 255;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
1;
|
||||
330
database/perl/vendor/lib/Encode/compat/Alias.pm
vendored
Normal file
330
database/perl/vendor/lib/Encode/compat/Alias.pm
vendored
Normal file
@@ -0,0 +1,330 @@
|
||||
# $File: //member/autrijus/.vimrc $ $Author: autrijus $
|
||||
# $Revision: #1 $ $Change: 1649 $ $DateTime: 2002/10/24 15:21:23 $
|
||||
|
||||
package Encode::compat::Alias;
|
||||
our $VERSION = '0.05';
|
||||
|
||||
1;
|
||||
|
||||
package Encode::Alias;
|
||||
use strict;
|
||||
our $VERSION = '0.05';
|
||||
our $DEBUG = 0;
|
||||
|
||||
use base qw(Exporter);
|
||||
|
||||
# Public, encouraged API is exported by default
|
||||
|
||||
our @EXPORT =
|
||||
qw (
|
||||
define_alias
|
||||
find_alias
|
||||
);
|
||||
|
||||
our @Alias; # ordered matching list
|
||||
our %Alias; # cached known aliases
|
||||
|
||||
sub find_alias
|
||||
{
|
||||
my $class = shift;
|
||||
local $_ = shift;
|
||||
unless (exists $Alias{$_})
|
||||
{
|
||||
$Alias{$_} = undef; # Recursion guard
|
||||
for (my $i=0; $i < @Alias; $i += 2)
|
||||
{
|
||||
my $alias = $Alias[$i];
|
||||
my $val = $Alias[$i+1];
|
||||
my $new;
|
||||
if (ref($alias) eq 'Regexp' && $_ =~ $alias)
|
||||
{
|
||||
$DEBUG and warn "eval $val";
|
||||
$new = eval $val;
|
||||
# $@ and warn "$val, $@";
|
||||
}
|
||||
elsif (ref($alias) eq 'CODE')
|
||||
{
|
||||
$DEBUG and warn "$alias", "->", "($val)";
|
||||
$new = $alias->($val);
|
||||
}
|
||||
elsif (lc($_) eq lc($alias))
|
||||
{
|
||||
$new = $val;
|
||||
}
|
||||
if (defined($new))
|
||||
{
|
||||
next if $new eq $_; # avoid (direct) recursion on bugs
|
||||
$DEBUG and warn "$alias, $new";
|
||||
my $enc = (ref($new)) ? $new : Encode::find_encoding($new);
|
||||
if ($enc)
|
||||
{
|
||||
$Alias{$_} = $enc;
|
||||
last;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($DEBUG){
|
||||
my $name;
|
||||
if (my $e = $Alias{$_}){
|
||||
$name = $e->name;
|
||||
}else{
|
||||
$name = "";
|
||||
}
|
||||
warn "find_alias($class, $_)->name = $name";
|
||||
}
|
||||
return $Alias{$_};
|
||||
}
|
||||
|
||||
sub define_alias
|
||||
{
|
||||
while (@_)
|
||||
{
|
||||
my ($alias,$name) = splice(@_,0,2);
|
||||
unshift(@Alias, $alias => $name); # newer one has precedence
|
||||
# clear %Alias cache to allow overrides
|
||||
if (ref($alias)){
|
||||
my @a = keys %Alias;
|
||||
for my $k (@a){
|
||||
if (ref($alias) eq 'Regexp' && $k =~ $alias)
|
||||
{
|
||||
$DEBUG and warn "delete \$Alias\{$k\}";
|
||||
delete $Alias{$k};
|
||||
}
|
||||
elsif (ref($alias) eq 'CODE')
|
||||
{
|
||||
$DEBUG and warn "delete \$Alias\{$k\}";
|
||||
delete $Alias{$alias->($name)};
|
||||
}
|
||||
}
|
||||
}else{
|
||||
$DEBUG and warn "delete \$Alias\{$alias\}";
|
||||
delete $Alias{$alias};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Allow latin-1 style names as well
|
||||
# 0 1 2 3 4 5 6 7 8 9 10
|
||||
our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
|
||||
# Allow winlatin1 style names as well
|
||||
our %Winlatin2cp = (
|
||||
'latin1' => 1252,
|
||||
'latin2' => 1250,
|
||||
'cyrillic' => 1251,
|
||||
'greek' => 1253,
|
||||
'turkish' => 1254,
|
||||
'hebrew' => 1255,
|
||||
'arabic' => 1256,
|
||||
'baltic' => 1257,
|
||||
'vietnamese' => 1258,
|
||||
);
|
||||
|
||||
init_aliases();
|
||||
|
||||
sub undef_aliases{
|
||||
@Alias = ();
|
||||
%Alias = ();
|
||||
}
|
||||
|
||||
sub init_aliases
|
||||
{
|
||||
undef_aliases();
|
||||
|
||||
# Try all-lower-case version should all else fails
|
||||
define_alias( qr/^(.*)$/ => '"\L$1"' );
|
||||
|
||||
# UTF/UCS stuff
|
||||
define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
|
||||
define_alias( qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"',
|
||||
qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")',
|
||||
qr/^iso-10646-1$/i => '"UCS-2BE"' );
|
||||
define_alias( qr/^UTF(16|32)-?BE$/i => '"UTF-$1BE"',
|
||||
qr/^UTF(16|32)-?LE$/i => '"UTF-$1LE"',
|
||||
qr/^UTF(16|32)$/i => '"UTF-$1"',
|
||||
);
|
||||
# ASCII
|
||||
define_alias(qr/^(?:US-?)ascii$/i => '"ascii"');
|
||||
define_alias('C' => 'ascii');
|
||||
define_alias(qr/\bISO[-_]?646[-_]?US$/i => '"ascii"');
|
||||
# Allow variants of iso-8859-1 etc.
|
||||
define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
|
||||
|
||||
# At least HP-UX has these.
|
||||
define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' );
|
||||
|
||||
# More HP stuff.
|
||||
define_alias( qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => '"${1}8"' );
|
||||
|
||||
# The Official name of ASCII.
|
||||
define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
|
||||
|
||||
# This is a font issue, not an encoding issue.
|
||||
# (The currency symbol of the Latin 1 upper half
|
||||
# has been redefined as the euro symbol.)
|
||||
define_alias( qr/^(.+)\@euro$/i => '"$1"' );
|
||||
|
||||
define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i
|
||||
=> 'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef' );
|
||||
|
||||
define_alias( qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
|
||||
hebrew|arabic|baltic|vietnamese)$/ix =>
|
||||
'"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' );
|
||||
|
||||
# Common names for non-latin prefered MIME names
|
||||
define_alias( 'ascii' => 'US-ascii',
|
||||
'cyrillic' => 'iso-8859-5',
|
||||
'arabic' => 'iso-8859-6',
|
||||
'greek' => 'iso-8859-7',
|
||||
'hebrew' => 'iso-8859-8',
|
||||
'thai' => 'iso-8859-11',
|
||||
'tis620' => 'iso-8859-11',
|
||||
);
|
||||
|
||||
# At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
|
||||
# And Microsoft has their own naming (again, surprisingly).
|
||||
# And windows-* is registered in IANA!
|
||||
define_alias( qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"');
|
||||
|
||||
# Sometimes seen with a leading zero.
|
||||
# define_alias( qr/\bcp037\b/i => '"cp37"');
|
||||
|
||||
# Mac Mappings
|
||||
# predefined in *.ucm; unneeded
|
||||
# define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
|
||||
define_alias( qr/^mac_(.*)$/i => '"mac$1"');
|
||||
# Ououououou. gone. They are differente!
|
||||
# define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
|
||||
|
||||
# Standardize on the dashed versions.
|
||||
# define_alias( qr/\butf8$/i => 'utf-8' );
|
||||
define_alias( qr/\bkoi8r$/i => 'koi8-r' );
|
||||
define_alias( qr/\bkoi8u$/i => 'koi8-u' );
|
||||
|
||||
unless ($Encode::ON_EBCDIC){
|
||||
# for Encode::CN
|
||||
define_alias( qr/\beuc.*cn$/i => '"euc-cn"' );
|
||||
define_alias( qr/\bcn.*euc$/i => '"euc-cn"' );
|
||||
# define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
|
||||
# CP936 doesn't have vendor-addon for GBK, so they're identical.
|
||||
define_alias( qr/^gbk$/i => '"cp936"');
|
||||
# This fixes gb2312 vs. euc-cn confusion, practically
|
||||
define_alias( qr/\bGB[-_ ]?2312(?:\D.*$|$)/i => '"euc-cn"' );
|
||||
# for Encode::JP
|
||||
define_alias( qr/\bjis$/i => '"7bit-jis"' );
|
||||
define_alias( qr/\beuc.*jp$/i => '"euc-jp"' );
|
||||
define_alias( qr/\bjp.*euc$/i => '"euc-jp"' );
|
||||
define_alias( qr/\bujis$/i => '"euc-jp"' );
|
||||
define_alias( qr/\bshift.*jis$/i => '"shiftjis"' );
|
||||
define_alias( qr/\bsjis$/i => '"shiftjis"' );
|
||||
# for Encode::KR
|
||||
define_alias( qr/\beuc.*kr$/i => '"euc-kr"' );
|
||||
define_alias( qr/\bkr.*euc$/i => '"euc-kr"' );
|
||||
# This fixes ksc5601 vs. euc-kr confusion, practically
|
||||
define_alias( qr/(?:x-)?uhc$/i => '"cp949"' );
|
||||
define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' );
|
||||
define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' );
|
||||
# for Encode::TW
|
||||
define_alias( qr/\bbig-?5$/i => '"big5-eten"' );
|
||||
define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' );
|
||||
define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' );
|
||||
define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' );
|
||||
define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
|
||||
}
|
||||
# utf8 is blessed :)
|
||||
define_alias( qr/^UTF-8$/i => '"utf8"',);
|
||||
# At last, Map white space and _ to '-'
|
||||
define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
|
||||
}
|
||||
|
||||
1;
|
||||
__END__
|
||||
|
||||
# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
|
||||
# TODO: HP-UX '15' encodings japanese15 korean15 roi15
|
||||
# TODO: Cyrillic encoding ISO-IR-111 (useful?)
|
||||
# TODO: Armenian encoding ARMSCII-8
|
||||
# TODO: Hebrew encoding ISO-8859-8-1
|
||||
# TODO: Thai encoding TCVN
|
||||
# TODO: Vietnamese encodings VPS
|
||||
# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
|
||||
# ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
|
||||
# Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
|
||||
# Kannada Khmer Korean Laotian Malayalam Mongolian
|
||||
# Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Encode::Alias - alias definitions to encodings
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use Encode;
|
||||
use Encode::Alias;
|
||||
define_alias( newName => ENCODING);
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
Allows newName to be used as an alias for ENCODING. ENCODING may be
|
||||
either the name of an encoding or an encoding object (as described
|
||||
in L<Encode>).
|
||||
|
||||
Currently I<newName> can be specified in the following ways:
|
||||
|
||||
=over 4
|
||||
|
||||
=item As a simple string.
|
||||
|
||||
=item As a qr// compiled regular expression, e.g.:
|
||||
|
||||
define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
|
||||
|
||||
In this case, if I<ENCODING> is not a reference, it is C<eval>-ed
|
||||
in order to allow C<$1> etc. to be substituted. The example is one
|
||||
way to alias names as used in X11 fonts to the MIME names for the
|
||||
iso-8859-* family. Note the double quotes inside the single quotes.
|
||||
|
||||
If you are using a regex here, you have to use the quotes as shown or
|
||||
it won't work. Also note that regex handling is tricky even for the
|
||||
experienced. Use it with caution.
|
||||
|
||||
=item As a code reference, e.g.:
|
||||
|
||||
define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , '');
|
||||
|
||||
In this case, C<$_> will be set to the name that is being looked up and
|
||||
I<ENCODING> is passed to the sub as its first argument. The example
|
||||
is another way to alias names as used in X11 fonts to the MIME names
|
||||
for the iso-8859-* family.
|
||||
|
||||
=back
|
||||
|
||||
=head2 Alias overloading
|
||||
|
||||
You can override predefined aliases by simply applying define_alias().
|
||||
The new alias is always evaluated first, and when neccessary,
|
||||
define_alias() flushes the internal cache to make the new definition
|
||||
available.
|
||||
|
||||
# redirect SHIFT_JIS to MS/IBM Code Page 932, which is a
|
||||
# superset of SHIFT_JIS
|
||||
|
||||
define_alias( qr/shift.*jis$/i => '"cp932"' );
|
||||
define_alias( qr/sjis$/i => '"cp932"' );
|
||||
|
||||
If you want to zap all predefined aliases, you can use
|
||||
|
||||
Encode::Alias->undef_aliases;
|
||||
|
||||
to do so. And
|
||||
|
||||
Encode::Alias->init_aliases;
|
||||
|
||||
gets the factory settings back.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<Encode>, L<Encode::Supported>
|
||||
|
||||
=cut
|
||||
|
||||
127
database/perl/vendor/lib/Encode/compat/common.pm
vendored
Normal file
127
database/perl/vendor/lib/Encode/compat/common.pm
vendored
Normal file
@@ -0,0 +1,127 @@
|
||||
# $File: //member/autrijus/Encode-compat/lib/Encode/compat/common.pm $ $Author: autrijus $
|
||||
# $Revision: #7 $ $Change: 10024 $ $DateTime: 2004/02/13 21:42:35 $
|
||||
|
||||
package Encode::compat::common;
|
||||
our $VERSION = '0.06';
|
||||
|
||||
1;
|
||||
|
||||
package Encode;
|
||||
|
||||
use strict;
|
||||
our $VERSION = '0.06';
|
||||
|
||||
our @EXPORT = qw(
|
||||
decode decode_utf8 encode encode_utf8
|
||||
encodings find_encoding
|
||||
);
|
||||
|
||||
use constant DIE_ON_ERR => 1;
|
||||
use constant WARN_ON_ERR => 2;
|
||||
use constant RETURN_ON_ERR => 4;
|
||||
use constant LEAVE_SRC => 8;
|
||||
|
||||
use constant PERLQQ => 256;
|
||||
use constant HTMLCREF => 512;
|
||||
use constant XMLCREF => 1024;
|
||||
|
||||
use constant FB_DEFAULT => 0;
|
||||
use constant FB_CROAK => 1;
|
||||
use constant FB_QUIET => 4;
|
||||
use constant FB_WARN => 6;
|
||||
use constant FB_PERLQQ => 256;
|
||||
use constant FB_HTMLCREF => 512;
|
||||
use constant FB_XMLCREF => 1024;
|
||||
|
||||
our @FB_FLAGS = qw(DIE_ON_ERR WARN_ON_ERR RETURN_ON_ERR LEAVE_SRC
|
||||
PERLQQ HTMLCREF XMLCREF);
|
||||
our @FB_CONSTS = qw(FB_DEFAULT FB_CROAK FB_QUIET FB_WARN
|
||||
FB_PERLQQ FB_HTMLCREF FB_XMLCREF);
|
||||
|
||||
our @EXPORT_OK =
|
||||
(
|
||||
qw(
|
||||
_utf8_off _utf8_on define_encoding from_to is_16bit is_8bit
|
||||
is_utf8 perlio_ok resolve_alias utf8_downgrade utf8_upgrade
|
||||
),
|
||||
@FB_FLAGS, @FB_CONSTS,
|
||||
);
|
||||
|
||||
our %EXPORT_TAGS =
|
||||
(
|
||||
all => [ @EXPORT, @EXPORT_OK ],
|
||||
fallbacks => [ @FB_CONSTS ],
|
||||
fallback_all => [ @FB_CONSTS, @FB_FLAGS ],
|
||||
);
|
||||
|
||||
sub from_to ($$$;$) {
|
||||
use utf8;
|
||||
|
||||
# XXX: bad hack
|
||||
if ($_[3] and $_[3] == FB_HTMLCREF() and lc($_[2]) eq 'latin1') {
|
||||
$_[0] = join('', map {
|
||||
ord($_) < 128
|
||||
? $_ : '&#' . ord($_) . ';'
|
||||
} split(//, decode($_[1], $_[0])));
|
||||
}
|
||||
else {
|
||||
$_[0] = _convert(@_[0..2]);
|
||||
}
|
||||
}
|
||||
|
||||
sub encodings {
|
||||
# XXX: revisit
|
||||
require Encode::Alias;
|
||||
return sort values %Encode::Alias::Alias;
|
||||
}
|
||||
|
||||
sub find_encoding {
|
||||
return $_[0];
|
||||
}
|
||||
|
||||
sub decode_utf8($;$) {
|
||||
return decode("utf-8", @_);
|
||||
}
|
||||
|
||||
sub encode_utf8($;$) {
|
||||
return encode("utf-8", @_);
|
||||
}
|
||||
|
||||
sub decode($$;$) {
|
||||
my $result = ($_[0] =~ /utf-?8/i)
|
||||
? $_[1] : _convert($_[1], $_[0] => 'utf-8');
|
||||
_utf8_on($result);
|
||||
return $result;
|
||||
}
|
||||
|
||||
sub encode($$;$) {
|
||||
my $result = ($_[0] =~ /utf-?8/i)
|
||||
? $_[1] : _convert($_[1], 'utf-8' => $_[0]);
|
||||
_utf8_off($result);
|
||||
return $result;
|
||||
}
|
||||
|
||||
{
|
||||
my %decoder;
|
||||
sub _convert {
|
||||
require Text::Iconv;
|
||||
Text::Iconv->raise_error(1);
|
||||
|
||||
require Encode::Alias;
|
||||
my ($from, $to) = map {
|
||||
s/^utf8$/utf-8/i;
|
||||
s/^big5-eten$/big5/i;
|
||||
$_;
|
||||
} map {
|
||||
Encode::Alias->find_alias($_) || lc($_)
|
||||
} ($_[1], $_[2]);
|
||||
|
||||
my $result = ($from eq $to) ? $_[0] : (
|
||||
$decoder{$from, $to} ||= Text::Iconv->new( $from, $to )
|
||||
)->convert($_[0]);
|
||||
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
1;
|
||||
Reference in New Issue
Block a user