Initial Commit

This commit is contained in:
Riley Schneider
2025-12-03 16:38:10 +01:00
parent c5e26bf594
commit b732d8d4b5
17680 changed files with 5977495 additions and 2 deletions

View File

@@ -0,0 +1,32 @@
# $File: //member/autrijus/Encode-compat/lib/Encode/compat/5006001.pm $ $Author: autrijus $
# $Revision: #3 $ $Change: 2534 $ $DateTime: 2002/12/02 00:33:16 $
package Encode::compat::5006001;
our $VERSION = '0.05';
1;
package Encode;
use strict;
use base 'Exporter';
no warnings 'redefine';
sub _utf8_on {
$_[0] = pack('U*', unpack('U0U*', $_[0]))
}
sub _utf8_off {
$_[0] = pack('C*', unpack('C*', $_[0]))
}
sub is_utf8 {
# XXX: got any better ideas?
use utf8;
foreach my $char (split(//, $_[0])) {
return 1 if ord($char) > 255;
}
return 0;
}
1;

View File

@@ -0,0 +1,330 @@
# $File: //member/autrijus/.vimrc $ $Author: autrijus $
# $Revision: #1 $ $Change: 1649 $ $DateTime: 2002/10/24 15:21:23 $
package Encode::compat::Alias;
our $VERSION = '0.05';
1;
package Encode::Alias;
use strict;
our $VERSION = '0.05';
our $DEBUG = 0;
use base qw(Exporter);
# Public, encouraged API is exported by default
our @EXPORT =
qw (
define_alias
find_alias
);
our @Alias; # ordered matching list
our %Alias; # cached known aliases
sub find_alias
{
my $class = shift;
local $_ = shift;
unless (exists $Alias{$_})
{
$Alias{$_} = undef; # Recursion guard
for (my $i=0; $i < @Alias; $i += 2)
{
my $alias = $Alias[$i];
my $val = $Alias[$i+1];
my $new;
if (ref($alias) eq 'Regexp' && $_ =~ $alias)
{
$DEBUG and warn "eval $val";
$new = eval $val;
# $@ and warn "$val, $@";
}
elsif (ref($alias) eq 'CODE')
{
$DEBUG and warn "$alias", "->", "($val)";
$new = $alias->($val);
}
elsif (lc($_) eq lc($alias))
{
$new = $val;
}
if (defined($new))
{
next if $new eq $_; # avoid (direct) recursion on bugs
$DEBUG and warn "$alias, $new";
my $enc = (ref($new)) ? $new : Encode::find_encoding($new);
if ($enc)
{
$Alias{$_} = $enc;
last;
}
}
}
}
if ($DEBUG){
my $name;
if (my $e = $Alias{$_}){
$name = $e->name;
}else{
$name = "";
}
warn "find_alias($class, $_)->name = $name";
}
return $Alias{$_};
}
sub define_alias
{
while (@_)
{
my ($alias,$name) = splice(@_,0,2);
unshift(@Alias, $alias => $name); # newer one has precedence
# clear %Alias cache to allow overrides
if (ref($alias)){
my @a = keys %Alias;
for my $k (@a){
if (ref($alias) eq 'Regexp' && $k =~ $alias)
{
$DEBUG and warn "delete \$Alias\{$k\}";
delete $Alias{$k};
}
elsif (ref($alias) eq 'CODE')
{
$DEBUG and warn "delete \$Alias\{$k\}";
delete $Alias{$alias->($name)};
}
}
}else{
$DEBUG and warn "delete \$Alias\{$alias\}";
delete $Alias{$alias};
}
}
}
# Allow latin-1 style names as well
# 0 1 2 3 4 5 6 7 8 9 10
our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
# Allow winlatin1 style names as well
our %Winlatin2cp = (
'latin1' => 1252,
'latin2' => 1250,
'cyrillic' => 1251,
'greek' => 1253,
'turkish' => 1254,
'hebrew' => 1255,
'arabic' => 1256,
'baltic' => 1257,
'vietnamese' => 1258,
);
init_aliases();
sub undef_aliases{
@Alias = ();
%Alias = ();
}
sub init_aliases
{
undef_aliases();
# Try all-lower-case version should all else fails
define_alias( qr/^(.*)$/ => '"\L$1"' );
# UTF/UCS stuff
define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
define_alias( qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"',
qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")',
qr/^iso-10646-1$/i => '"UCS-2BE"' );
define_alias( qr/^UTF(16|32)-?BE$/i => '"UTF-$1BE"',
qr/^UTF(16|32)-?LE$/i => '"UTF-$1LE"',
qr/^UTF(16|32)$/i => '"UTF-$1"',
);
# ASCII
define_alias(qr/^(?:US-?)ascii$/i => '"ascii"');
define_alias('C' => 'ascii');
define_alias(qr/\bISO[-_]?646[-_]?US$/i => '"ascii"');
# Allow variants of iso-8859-1 etc.
define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
# At least HP-UX has these.
define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' );
# More HP stuff.
define_alias( qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => '"${1}8"' );
# The Official name of ASCII.
define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
# This is a font issue, not an encoding issue.
# (The currency symbol of the Latin 1 upper half
# has been redefined as the euro symbol.)
define_alias( qr/^(.+)\@euro$/i => '"$1"' );
define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i
=> 'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef' );
define_alias( qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
hebrew|arabic|baltic|vietnamese)$/ix =>
'"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' );
# Common names for non-latin prefered MIME names
define_alias( 'ascii' => 'US-ascii',
'cyrillic' => 'iso-8859-5',
'arabic' => 'iso-8859-6',
'greek' => 'iso-8859-7',
'hebrew' => 'iso-8859-8',
'thai' => 'iso-8859-11',
'tis620' => 'iso-8859-11',
);
# At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
# And Microsoft has their own naming (again, surprisingly).
# And windows-* is registered in IANA!
define_alias( qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"');
# Sometimes seen with a leading zero.
# define_alias( qr/\bcp037\b/i => '"cp37"');
# Mac Mappings
# predefined in *.ucm; unneeded
# define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
define_alias( qr/^mac_(.*)$/i => '"mac$1"');
# Ououououou. gone. They are differente!
# define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
# Standardize on the dashed versions.
# define_alias( qr/\butf8$/i => 'utf-8' );
define_alias( qr/\bkoi8r$/i => 'koi8-r' );
define_alias( qr/\bkoi8u$/i => 'koi8-u' );
unless ($Encode::ON_EBCDIC){
# for Encode::CN
define_alias( qr/\beuc.*cn$/i => '"euc-cn"' );
define_alias( qr/\bcn.*euc$/i => '"euc-cn"' );
# define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
# CP936 doesn't have vendor-addon for GBK, so they're identical.
define_alias( qr/^gbk$/i => '"cp936"');
# This fixes gb2312 vs. euc-cn confusion, practically
define_alias( qr/\bGB[-_ ]?2312(?:\D.*$|$)/i => '"euc-cn"' );
# for Encode::JP
define_alias( qr/\bjis$/i => '"7bit-jis"' );
define_alias( qr/\beuc.*jp$/i => '"euc-jp"' );
define_alias( qr/\bjp.*euc$/i => '"euc-jp"' );
define_alias( qr/\bujis$/i => '"euc-jp"' );
define_alias( qr/\bshift.*jis$/i => '"shiftjis"' );
define_alias( qr/\bsjis$/i => '"shiftjis"' );
# for Encode::KR
define_alias( qr/\beuc.*kr$/i => '"euc-kr"' );
define_alias( qr/\bkr.*euc$/i => '"euc-kr"' );
# This fixes ksc5601 vs. euc-kr confusion, practically
define_alias( qr/(?:x-)?uhc$/i => '"cp949"' );
define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' );
define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' );
# for Encode::TW
define_alias( qr/\bbig-?5$/i => '"big5-eten"' );
define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' );
define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' );
define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' );
define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
}
# utf8 is blessed :)
define_alias( qr/^UTF-8$/i => '"utf8"',);
# At last, Map white space and _ to '-'
define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
}
1;
__END__
# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
# TODO: HP-UX '15' encodings japanese15 korean15 roi15
# TODO: Cyrillic encoding ISO-IR-111 (useful?)
# TODO: Armenian encoding ARMSCII-8
# TODO: Hebrew encoding ISO-8859-8-1
# TODO: Thai encoding TCVN
# TODO: Vietnamese encodings VPS
# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
# ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
# Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
# Kannada Khmer Korean Laotian Malayalam Mongolian
# Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
=head1 NAME
Encode::Alias - alias definitions to encodings
=head1 SYNOPSIS
use Encode;
use Encode::Alias;
define_alias( newName => ENCODING);
=head1 DESCRIPTION
Allows newName to be used as an alias for ENCODING. ENCODING may be
either the name of an encoding or an encoding object (as described
in L<Encode>).
Currently I<newName> can be specified in the following ways:
=over 4
=item As a simple string.
=item As a qr// compiled regular expression, e.g.:
define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
In this case, if I<ENCODING> is not a reference, it is C<eval>-ed
in order to allow C<$1> etc. to be substituted. The example is one
way to alias names as used in X11 fonts to the MIME names for the
iso-8859-* family. Note the double quotes inside the single quotes.
If you are using a regex here, you have to use the quotes as shown or
it won't work. Also note that regex handling is tricky even for the
experienced. Use it with caution.
=item As a code reference, e.g.:
define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , '');
In this case, C<$_> will be set to the name that is being looked up and
I<ENCODING> is passed to the sub as its first argument. The example
is another way to alias names as used in X11 fonts to the MIME names
for the iso-8859-* family.
=back
=head2 Alias overloading
You can override predefined aliases by simply applying define_alias().
The new alias is always evaluated first, and when neccessary,
define_alias() flushes the internal cache to make the new definition
available.
# redirect SHIFT_JIS to MS/IBM Code Page 932, which is a
# superset of SHIFT_JIS
define_alias( qr/shift.*jis$/i => '"cp932"' );
define_alias( qr/sjis$/i => '"cp932"' );
If you want to zap all predefined aliases, you can use
Encode::Alias->undef_aliases;
to do so. And
Encode::Alias->init_aliases;
gets the factory settings back.
=head1 SEE ALSO
L<Encode>, L<Encode::Supported>
=cut

View File

@@ -0,0 +1,127 @@
# $File: //member/autrijus/Encode-compat/lib/Encode/compat/common.pm $ $Author: autrijus $
# $Revision: #7 $ $Change: 10024 $ $DateTime: 2004/02/13 21:42:35 $
package Encode::compat::common;
our $VERSION = '0.06';
1;
package Encode;
use strict;
our $VERSION = '0.06';
our @EXPORT = qw(
decode decode_utf8 encode encode_utf8
encodings find_encoding
);
use constant DIE_ON_ERR => 1;
use constant WARN_ON_ERR => 2;
use constant RETURN_ON_ERR => 4;
use constant LEAVE_SRC => 8;
use constant PERLQQ => 256;
use constant HTMLCREF => 512;
use constant XMLCREF => 1024;
use constant FB_DEFAULT => 0;
use constant FB_CROAK => 1;
use constant FB_QUIET => 4;
use constant FB_WARN => 6;
use constant FB_PERLQQ => 256;
use constant FB_HTMLCREF => 512;
use constant FB_XMLCREF => 1024;
our @FB_FLAGS = qw(DIE_ON_ERR WARN_ON_ERR RETURN_ON_ERR LEAVE_SRC
PERLQQ HTMLCREF XMLCREF);
our @FB_CONSTS = qw(FB_DEFAULT FB_CROAK FB_QUIET FB_WARN
FB_PERLQQ FB_HTMLCREF FB_XMLCREF);
our @EXPORT_OK =
(
qw(
_utf8_off _utf8_on define_encoding from_to is_16bit is_8bit
is_utf8 perlio_ok resolve_alias utf8_downgrade utf8_upgrade
),
@FB_FLAGS, @FB_CONSTS,
);
our %EXPORT_TAGS =
(
all => [ @EXPORT, @EXPORT_OK ],
fallbacks => [ @FB_CONSTS ],
fallback_all => [ @FB_CONSTS, @FB_FLAGS ],
);
sub from_to ($$$;$) {
use utf8;
# XXX: bad hack
if ($_[3] and $_[3] == FB_HTMLCREF() and lc($_[2]) eq 'latin1') {
$_[0] = join('', map {
ord($_) < 128
? $_ : '&#' . ord($_) . ';'
} split(//, decode($_[1], $_[0])));
}
else {
$_[0] = _convert(@_[0..2]);
}
}
sub encodings {
# XXX: revisit
require Encode::Alias;
return sort values %Encode::Alias::Alias;
}
sub find_encoding {
return $_[0];
}
sub decode_utf8($;$) {
return decode("utf-8", @_);
}
sub encode_utf8($;$) {
return encode("utf-8", @_);
}
sub decode($$;$) {
my $result = ($_[0] =~ /utf-?8/i)
? $_[1] : _convert($_[1], $_[0] => 'utf-8');
_utf8_on($result);
return $result;
}
sub encode($$;$) {
my $result = ($_[0] =~ /utf-?8/i)
? $_[1] : _convert($_[1], 'utf-8' => $_[0]);
_utf8_off($result);
return $result;
}
{
my %decoder;
sub _convert {
require Text::Iconv;
Text::Iconv->raise_error(1);
require Encode::Alias;
my ($from, $to) = map {
s/^utf8$/utf-8/i;
s/^big5-eten$/big5/i;
$_;
} map {
Encode::Alias->find_alias($_) || lc($_)
} ($_[1], $_[2]);
my $result = ($from eq $to) ? $_[0] : (
$decoder{$from, $to} ||= Text::Iconv->new( $from, $to )
)->convert($_[0]);
return $result;
}
}
1;