Initial Commit
This commit is contained in:
732
database/perl/lib/Pod/Escapes.pm
Normal file
732
database/perl/lib/Pod/Escapes.pm
Normal file
@@ -0,0 +1,732 @@
|
||||
package Pod::Escapes;
|
||||
use strict;
|
||||
use warnings;
|
||||
use 5.006;
|
||||
|
||||
use vars qw(
|
||||
%Code2USASCII
|
||||
%Name2character
|
||||
%Name2character_number
|
||||
%Latin1Code_to_fallback
|
||||
%Latin1Char_to_fallback
|
||||
$FAR_CHAR
|
||||
$FAR_CHAR_NUMBER
|
||||
$NOT_ASCII
|
||||
@ISA $VERSION @EXPORT_OK %EXPORT_TAGS
|
||||
);
|
||||
|
||||
require Exporter;
|
||||
@ISA = ('Exporter');
|
||||
$VERSION = '1.07';
|
||||
@EXPORT_OK = qw(
|
||||
%Code2USASCII
|
||||
%Name2character
|
||||
%Name2character_number
|
||||
%Latin1Code_to_fallback
|
||||
%Latin1Char_to_fallback
|
||||
e2char
|
||||
e2charnum
|
||||
);
|
||||
%EXPORT_TAGS = ('ALL' => \@EXPORT_OK);
|
||||
|
||||
#==========================================================================
|
||||
|
||||
$FAR_CHAR = "?" unless defined $FAR_CHAR;
|
||||
$FAR_CHAR_NUMBER = ord($FAR_CHAR) unless defined $FAR_CHAR_NUMBER;
|
||||
|
||||
$NOT_ASCII = 'A' ne chr(65) unless defined $NOT_ASCII;
|
||||
|
||||
#--------------------------------------------------------------------------
|
||||
sub e2char {
|
||||
my $in = $_[0];
|
||||
return undef unless defined $in and length $in;
|
||||
|
||||
# Convert to decimal:
|
||||
if($in =~ m/^(0[0-7]*)$/s ) {
|
||||
$in = oct $in;
|
||||
} elsif($in =~ m/^0?x([0-9a-fA-F]+)$/s ) {
|
||||
$in = hex $1;
|
||||
} # else it's decimal, or named
|
||||
|
||||
if($in =~ m/^\d+$/s) {
|
||||
if($] < 5.007 and $in > 255) { # can't be trusted with Unicode
|
||||
return $FAR_CHAR;
|
||||
} elsif ($] >= 5.007003) {
|
||||
return chr(utf8::unicode_to_native($in));
|
||||
} elsif ($NOT_ASCII) {
|
||||
return $Code2USASCII{$in} # so "65" => "A" everywhere
|
||||
|| $Latin1Code_to_fallback{$in} # Fallback.
|
||||
|| $FAR_CHAR; # Fall further back
|
||||
} else {
|
||||
return chr($in);
|
||||
}
|
||||
} else {
|
||||
return $Name2character{$in}; # returns undef if unknown
|
||||
}
|
||||
}
|
||||
|
||||
#--------------------------------------------------------------------------
|
||||
sub e2charnum {
|
||||
my $in = $_[0];
|
||||
return undef unless defined $in and length $in;
|
||||
|
||||
# Convert to decimal:
|
||||
if($in =~ m/^(0[0-7]*)$/s ) {
|
||||
$in = oct $in;
|
||||
} elsif($in =~ m/^0?x([0-9a-fA-F]+)$/s ) {
|
||||
$in = hex $1;
|
||||
} # else it's decimal, or named
|
||||
|
||||
if($in =~ m/^[0-9]+$/s) {
|
||||
return 0 + $in;
|
||||
} else {
|
||||
return $Name2character_number{$in}; # returns undef if unknown
|
||||
}
|
||||
}
|
||||
|
||||
#--------------------------------------------------------------------------
|
||||
|
||||
%Code2USASCII = (
|
||||
# mostly generated by
|
||||
# perl -e "printf qq{ \x25 3s, '\x25s',\n}, $_, chr($_) foreach (32 .. 126)"
|
||||
32, ' ',
|
||||
33, '!',
|
||||
34, '"',
|
||||
35, '#',
|
||||
36, '$',
|
||||
37, '%',
|
||||
38, '&',
|
||||
39, "'", #!
|
||||
40, '(',
|
||||
41, ')',
|
||||
42, '*',
|
||||
43, '+',
|
||||
44, ',',
|
||||
45, '-',
|
||||
46, '.',
|
||||
47, '/',
|
||||
48, '0',
|
||||
49, '1',
|
||||
50, '2',
|
||||
51, '3',
|
||||
52, '4',
|
||||
53, '5',
|
||||
54, '6',
|
||||
55, '7',
|
||||
56, '8',
|
||||
57, '9',
|
||||
58, ':',
|
||||
59, ';',
|
||||
60, '<',
|
||||
61, '=',
|
||||
62, '>',
|
||||
63, '?',
|
||||
64, '@',
|
||||
65, 'A',
|
||||
66, 'B',
|
||||
67, 'C',
|
||||
68, 'D',
|
||||
69, 'E',
|
||||
70, 'F',
|
||||
71, 'G',
|
||||
72, 'H',
|
||||
73, 'I',
|
||||
74, 'J',
|
||||
75, 'K',
|
||||
76, 'L',
|
||||
77, 'M',
|
||||
78, 'N',
|
||||
79, 'O',
|
||||
80, 'P',
|
||||
81, 'Q',
|
||||
82, 'R',
|
||||
83, 'S',
|
||||
84, 'T',
|
||||
85, 'U',
|
||||
86, 'V',
|
||||
87, 'W',
|
||||
88, 'X',
|
||||
89, 'Y',
|
||||
90, 'Z',
|
||||
91, '[',
|
||||
92, "\\", #!
|
||||
93, ']',
|
||||
94, '^',
|
||||
95, '_',
|
||||
96, '`',
|
||||
97, 'a',
|
||||
98, 'b',
|
||||
99, 'c',
|
||||
100, 'd',
|
||||
101, 'e',
|
||||
102, 'f',
|
||||
103, 'g',
|
||||
104, 'h',
|
||||
105, 'i',
|
||||
106, 'j',
|
||||
107, 'k',
|
||||
108, 'l',
|
||||
109, 'm',
|
||||
110, 'n',
|
||||
111, 'o',
|
||||
112, 'p',
|
||||
113, 'q',
|
||||
114, 'r',
|
||||
115, 's',
|
||||
116, 't',
|
||||
117, 'u',
|
||||
118, 'v',
|
||||
119, 'w',
|
||||
120, 'x',
|
||||
121, 'y',
|
||||
122, 'z',
|
||||
123, '{',
|
||||
124, '|',
|
||||
125, '}',
|
||||
126, '~',
|
||||
);
|
||||
|
||||
#--------------------------------------------------------------------------
|
||||
|
||||
%Latin1Code_to_fallback = ();
|
||||
@Latin1Code_to_fallback{0xA0 .. 0xFF} = (
|
||||
# Copied from Text/Unidecode/x00.pm:
|
||||
|
||||
' ', qq{!}, qq{C/}, 'PS', qq{\$?}, qq{Y=}, qq{|}, 'SS', qq{"}, qq{(c)}, 'a', qq{<<}, qq{!}, "", qq{(r)}, qq{-},
|
||||
'deg', qq{+-}, '2', '3', qq{'}, 'u', 'P', qq{*}, qq{,}, '1', 'o', qq{>>}, qq{1/4}, qq{1/2}, qq{3/4}, qq{?},
|
||||
'A', 'A', 'A', 'A', 'A', 'A', 'AE', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
|
||||
'D', 'N', 'O', 'O', 'O', 'O', 'O', 'x', 'O', 'U', 'U', 'U', 'U', 'U', 'Th', 'ss',
|
||||
'a', 'a', 'a', 'a', 'a', 'a', 'ae', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
|
||||
'd', 'n', 'o', 'o', 'o', 'o', 'o', qq{/}, 'o', 'u', 'u', 'u', 'u', 'y', 'th', 'y',
|
||||
|
||||
);
|
||||
|
||||
{
|
||||
# Now stuff %Latin1Char_to_fallback:
|
||||
%Latin1Char_to_fallback = ();
|
||||
my($k,$v);
|
||||
while( ($k,$v) = each %Latin1Code_to_fallback) {
|
||||
$Latin1Char_to_fallback{chr $k} = $v;
|
||||
#print chr($k), ' => ', $v, "\n";
|
||||
}
|
||||
}
|
||||
|
||||
#--------------------------------------------------------------------------
|
||||
|
||||
%Name2character_number = (
|
||||
# General XML/XHTML:
|
||||
'lt' => 60,
|
||||
'gt' => 62,
|
||||
'quot' => 34,
|
||||
'amp' => 38,
|
||||
'apos' => 39,
|
||||
|
||||
# POD-specific:
|
||||
'sol' => 47,
|
||||
'verbar' => 124,
|
||||
|
||||
'lchevron' => 171, # legacy for laquo
|
||||
'rchevron' => 187, # legacy for raquo
|
||||
|
||||
# Remember, grave looks like \ (as in virtu\)
|
||||
# acute looks like / (as in re/sume/)
|
||||
# circumflex looks like ^ (as in papier ma^che/)
|
||||
# umlaut/dieresis looks like " (as in nai"ve, Chloe")
|
||||
|
||||
# From the XHTML 1 .ent files:
|
||||
'nbsp' , 160,
|
||||
'iexcl' , 161,
|
||||
'cent' , 162,
|
||||
'pound' , 163,
|
||||
'curren' , 164,
|
||||
'yen' , 165,
|
||||
'brvbar' , 166,
|
||||
'sect' , 167,
|
||||
'uml' , 168,
|
||||
'copy' , 169,
|
||||
'ordf' , 170,
|
||||
'laquo' , 171,
|
||||
'not' , 172,
|
||||
'shy' , 173,
|
||||
'reg' , 174,
|
||||
'macr' , 175,
|
||||
'deg' , 176,
|
||||
'plusmn' , 177,
|
||||
'sup2' , 178,
|
||||
'sup3' , 179,
|
||||
'acute' , 180,
|
||||
'micro' , 181,
|
||||
'para' , 182,
|
||||
'middot' , 183,
|
||||
'cedil' , 184,
|
||||
'sup1' , 185,
|
||||
'ordm' , 186,
|
||||
'raquo' , 187,
|
||||
'frac14' , 188,
|
||||
'frac12' , 189,
|
||||
'frac34' , 190,
|
||||
'iquest' , 191,
|
||||
'Agrave' , 192,
|
||||
'Aacute' , 193,
|
||||
'Acirc' , 194,
|
||||
'Atilde' , 195,
|
||||
'Auml' , 196,
|
||||
'Aring' , 197,
|
||||
'AElig' , 198,
|
||||
'Ccedil' , 199,
|
||||
'Egrave' , 200,
|
||||
'Eacute' , 201,
|
||||
'Ecirc' , 202,
|
||||
'Euml' , 203,
|
||||
'Igrave' , 204,
|
||||
'Iacute' , 205,
|
||||
'Icirc' , 206,
|
||||
'Iuml' , 207,
|
||||
'ETH' , 208,
|
||||
'Ntilde' , 209,
|
||||
'Ograve' , 210,
|
||||
'Oacute' , 211,
|
||||
'Ocirc' , 212,
|
||||
'Otilde' , 213,
|
||||
'Ouml' , 214,
|
||||
'times' , 215,
|
||||
'Oslash' , 216,
|
||||
'Ugrave' , 217,
|
||||
'Uacute' , 218,
|
||||
'Ucirc' , 219,
|
||||
'Uuml' , 220,
|
||||
'Yacute' , 221,
|
||||
'THORN' , 222,
|
||||
'szlig' , 223,
|
||||
'agrave' , 224,
|
||||
'aacute' , 225,
|
||||
'acirc' , 226,
|
||||
'atilde' , 227,
|
||||
'auml' , 228,
|
||||
'aring' , 229,
|
||||
'aelig' , 230,
|
||||
'ccedil' , 231,
|
||||
'egrave' , 232,
|
||||
'eacute' , 233,
|
||||
'ecirc' , 234,
|
||||
'euml' , 235,
|
||||
'igrave' , 236,
|
||||
'iacute' , 237,
|
||||
'icirc' , 238,
|
||||
'iuml' , 239,
|
||||
'eth' , 240,
|
||||
'ntilde' , 241,
|
||||
'ograve' , 242,
|
||||
'oacute' , 243,
|
||||
'ocirc' , 244,
|
||||
'otilde' , 245,
|
||||
'ouml' , 246,
|
||||
'divide' , 247,
|
||||
'oslash' , 248,
|
||||
'ugrave' , 249,
|
||||
'uacute' , 250,
|
||||
'ucirc' , 251,
|
||||
'uuml' , 252,
|
||||
'yacute' , 253,
|
||||
'thorn' , 254,
|
||||
'yuml' , 255,
|
||||
|
||||
'fnof' , 402,
|
||||
'Alpha' , 913,
|
||||
'Beta' , 914,
|
||||
'Gamma' , 915,
|
||||
'Delta' , 916,
|
||||
'Epsilon' , 917,
|
||||
'Zeta' , 918,
|
||||
'Eta' , 919,
|
||||
'Theta' , 920,
|
||||
'Iota' , 921,
|
||||
'Kappa' , 922,
|
||||
'Lambda' , 923,
|
||||
'Mu' , 924,
|
||||
'Nu' , 925,
|
||||
'Xi' , 926,
|
||||
'Omicron' , 927,
|
||||
'Pi' , 928,
|
||||
'Rho' , 929,
|
||||
'Sigma' , 931,
|
||||
'Tau' , 932,
|
||||
'Upsilon' , 933,
|
||||
'Phi' , 934,
|
||||
'Chi' , 935,
|
||||
'Psi' , 936,
|
||||
'Omega' , 937,
|
||||
'alpha' , 945,
|
||||
'beta' , 946,
|
||||
'gamma' , 947,
|
||||
'delta' , 948,
|
||||
'epsilon' , 949,
|
||||
'zeta' , 950,
|
||||
'eta' , 951,
|
||||
'theta' , 952,
|
||||
'iota' , 953,
|
||||
'kappa' , 954,
|
||||
'lambda' , 955,
|
||||
'mu' , 956,
|
||||
'nu' , 957,
|
||||
'xi' , 958,
|
||||
'omicron' , 959,
|
||||
'pi' , 960,
|
||||
'rho' , 961,
|
||||
'sigmaf' , 962,
|
||||
'sigma' , 963,
|
||||
'tau' , 964,
|
||||
'upsilon' , 965,
|
||||
'phi' , 966,
|
||||
'chi' , 967,
|
||||
'psi' , 968,
|
||||
'omega' , 969,
|
||||
'thetasym' , 977,
|
||||
'upsih' , 978,
|
||||
'piv' , 982,
|
||||
'bull' , 8226,
|
||||
'hellip' , 8230,
|
||||
'prime' , 8242,
|
||||
'Prime' , 8243,
|
||||
'oline' , 8254,
|
||||
'frasl' , 8260,
|
||||
'weierp' , 8472,
|
||||
'image' , 8465,
|
||||
'real' , 8476,
|
||||
'trade' , 8482,
|
||||
'alefsym' , 8501,
|
||||
'larr' , 8592,
|
||||
'uarr' , 8593,
|
||||
'rarr' , 8594,
|
||||
'darr' , 8595,
|
||||
'harr' , 8596,
|
||||
'crarr' , 8629,
|
||||
'lArr' , 8656,
|
||||
'uArr' , 8657,
|
||||
'rArr' , 8658,
|
||||
'dArr' , 8659,
|
||||
'hArr' , 8660,
|
||||
'forall' , 8704,
|
||||
'part' , 8706,
|
||||
'exist' , 8707,
|
||||
'empty' , 8709,
|
||||
'nabla' , 8711,
|
||||
'isin' , 8712,
|
||||
'notin' , 8713,
|
||||
'ni' , 8715,
|
||||
'prod' , 8719,
|
||||
'sum' , 8721,
|
||||
'minus' , 8722,
|
||||
'lowast' , 8727,
|
||||
'radic' , 8730,
|
||||
'prop' , 8733,
|
||||
'infin' , 8734,
|
||||
'ang' , 8736,
|
||||
'and' , 8743,
|
||||
'or' , 8744,
|
||||
'cap' , 8745,
|
||||
'cup' , 8746,
|
||||
'int' , 8747,
|
||||
'there4' , 8756,
|
||||
'sim' , 8764,
|
||||
'cong' , 8773,
|
||||
'asymp' , 8776,
|
||||
'ne' , 8800,
|
||||
'equiv' , 8801,
|
||||
'le' , 8804,
|
||||
'ge' , 8805,
|
||||
'sub' , 8834,
|
||||
'sup' , 8835,
|
||||
'nsub' , 8836,
|
||||
'sube' , 8838,
|
||||
'supe' , 8839,
|
||||
'oplus' , 8853,
|
||||
'otimes' , 8855,
|
||||
'perp' , 8869,
|
||||
'sdot' , 8901,
|
||||
'lceil' , 8968,
|
||||
'rceil' , 8969,
|
||||
'lfloor' , 8970,
|
||||
'rfloor' , 8971,
|
||||
'lang' , 9001,
|
||||
'rang' , 9002,
|
||||
'loz' , 9674,
|
||||
'spades' , 9824,
|
||||
'clubs' , 9827,
|
||||
'hearts' , 9829,
|
||||
'diams' , 9830,
|
||||
'OElig' , 338,
|
||||
'oelig' , 339,
|
||||
'Scaron' , 352,
|
||||
'scaron' , 353,
|
||||
'Yuml' , 376,
|
||||
'circ' , 710,
|
||||
'tilde' , 732,
|
||||
'ensp' , 8194,
|
||||
'emsp' , 8195,
|
||||
'thinsp' , 8201,
|
||||
'zwnj' , 8204,
|
||||
'zwj' , 8205,
|
||||
'lrm' , 8206,
|
||||
'rlm' , 8207,
|
||||
'ndash' , 8211,
|
||||
'mdash' , 8212,
|
||||
'lsquo' , 8216,
|
||||
'rsquo' , 8217,
|
||||
'sbquo' , 8218,
|
||||
'ldquo' , 8220,
|
||||
'rdquo' , 8221,
|
||||
'bdquo' , 8222,
|
||||
'dagger' , 8224,
|
||||
'Dagger' , 8225,
|
||||
'permil' , 8240,
|
||||
'lsaquo' , 8249,
|
||||
'rsaquo' , 8250,
|
||||
'euro' , 8364,
|
||||
);
|
||||
|
||||
|
||||
# Fill out %Name2character...
|
||||
{
|
||||
%Name2character = ();
|
||||
my($name, $number);
|
||||
while( ($name, $number) = each %Name2character_number) {
|
||||
if($] < 5.007 and $number > 255) {
|
||||
$Name2character{$name} = $FAR_CHAR;
|
||||
# substitute for Unicode characters, for perls
|
||||
# that can't reliably handle them
|
||||
} elsif ($] >= 5.007003) {
|
||||
$Name2character{$name} = chr utf8::unicode_to_native($number);
|
||||
# normal case for more recent Perls where we can translate from Unicode
|
||||
# to the native character set.
|
||||
}
|
||||
elsif (exists $Code2USASCII{$number}) {
|
||||
$Name2character{$name} = $Code2USASCII{$number};
|
||||
# on older Perls, we can use the translations we have hard-coded in this
|
||||
# file, but these don't include the non-ASCII-range characters
|
||||
}
|
||||
elsif ($NOT_ASCII && $number > 127 && $number < 256) {
|
||||
# this range on old non-ASCII-platform perls is wrong
|
||||
if (exists $Latin1Code_to_fallback{$number}) {
|
||||
$Name2character{$name} = $Latin1Code_to_fallback{$number};
|
||||
} else {
|
||||
$Name2character{$name} = $FAR_CHAR;
|
||||
}
|
||||
} else {
|
||||
$Name2character{$name} = chr $number;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#--------------------------------------------------------------------------
|
||||
1;
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Pod::Escapes - for resolving Pod EE<lt>...E<gt> sequences
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use Pod::Escapes qw(e2char);
|
||||
...la la la, parsing POD, la la la...
|
||||
$text = e2char($e_node->label);
|
||||
unless(defined $text) {
|
||||
print "Unknown E sequence \"", $e_node->label, "\"!";
|
||||
}
|
||||
...else print/interpolate $text...
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module provides things that are useful in decoding
|
||||
Pod EE<lt>...E<gt> sequences. Presumably, it should be used
|
||||
only by Pod parsers and/or formatters.
|
||||
|
||||
By default, Pod::Escapes exports none of its symbols. But
|
||||
you can request any of them to be exported.
|
||||
Either request them individually, as with
|
||||
C<use Pod::Escapes qw(symbolname symbolname2...);>,
|
||||
or you can do C<use Pod::Escapes qw(:ALL);> to get all
|
||||
exportable symbols.
|
||||
|
||||
=head1 GOODIES
|
||||
|
||||
=over
|
||||
|
||||
=item e2char($e_content)
|
||||
|
||||
Given a name or number that could appear in a
|
||||
C<EE<lt>name_or_numE<gt>> sequence, this returns the string that
|
||||
it stands for. For example, C<e2char('sol')>, C<e2char('47')>,
|
||||
C<e2char('0x2F')>, and C<e2char('057')> all return "/",
|
||||
because C<EE<lt>solE<gt>>, C<EE<lt>47E<gt>>, C<EE<lt>0x2fE<gt>>,
|
||||
and C<EE<lt>057E<gt>>, all mean "/". If
|
||||
the name has no known value (as with a name of "qacute") or is
|
||||
syntactically invalid (as with a name of "1/4"), this returns undef.
|
||||
|
||||
=item e2charnum($e_content)
|
||||
|
||||
Given a name or number that could appear in a
|
||||
C<EE<lt>name_or_numE<gt>> sequence, this returns the number of
|
||||
the Unicode character that this stands for. For example,
|
||||
C<e2char('sol')>, C<e2char('47')>,
|
||||
C<e2char('0x2F')>, and C<e2char('057')> all return 47,
|
||||
because C<EE<lt>solE<gt>>, C<EE<lt>47E<gt>>, C<EE<lt>0x2fE<gt>>,
|
||||
and C<EE<lt>057E<gt>>, all mean "/", whose Unicode number is 47. If
|
||||
the name has no known value (as with a name of "qacute") or is
|
||||
syntactically invalid (as with a name of "1/4"), this returns undef.
|
||||
|
||||
=item $Name2character{I<name>}
|
||||
|
||||
Maps from names (as in C<EE<lt>I<name>E<gt>>) like "eacute" or "sol"
|
||||
to the string that each stands for. Note that this does not
|
||||
include numerics (like "64" or "x981c"). Under old Perl versions
|
||||
(before 5.7) you get a "?" in place of characters whose Unicode
|
||||
value is over 255.
|
||||
|
||||
=item $Name2character_number{I<name>}
|
||||
|
||||
Maps from names (as in C<EE<lt>I<name>E<gt>>) like "eacute" or "sol"
|
||||
to the Unicode value that each stands for. For example,
|
||||
C<$Name2character_number{'eacute'}> is 201, and
|
||||
C<$Name2character_number{'eacute'}> is 8364. You get the correct
|
||||
Unicode value, regardless of the version of Perl you're using --
|
||||
which differs from C<%Name2character>'s behavior under pre-5.7 Perls.
|
||||
|
||||
Note that this hash does not
|
||||
include numerics (like "64" or "x981c").
|
||||
|
||||
=item $Latin1Code_to_fallback{I<integer>}
|
||||
|
||||
For numbers in the range 160 (0x00A0) to 255 (0x00FF), this maps
|
||||
from the character code for a Latin-1 character (like 233 for
|
||||
lowercase e-acute) to the US-ASCII character that best aproximates
|
||||
it (like "e"). You may find this useful if you are rendering
|
||||
POD in a format that you think deals well only with US-ASCII
|
||||
characters.
|
||||
|
||||
=item $Latin1Char_to_fallback{I<character>}
|
||||
|
||||
Just as above, but maps from characters (like "\xE9",
|
||||
lowercase e-acute) to characters (like "e").
|
||||
|
||||
=item $Code2USASCII{I<integer>}
|
||||
|
||||
This maps from US-ASCII codes (like 32) to the corresponding
|
||||
character (like space, for 32). Only characters 32 to 126 are
|
||||
defined. This is meant for use by C<e2char($x)> when it senses
|
||||
that it's running on a non-ASCII platform (where chr(32) doesn't
|
||||
get you a space -- but $Code2USASCII{32} will). It's
|
||||
documented here just in case you might find it useful.
|
||||
|
||||
=back
|
||||
|
||||
=head1 CAVEATS
|
||||
|
||||
On Perl versions before 5.7, Unicode characters with a value
|
||||
over 255 (like lambda or emdash) can't be conveyed. This
|
||||
module does work under such early Perl versions, but in the
|
||||
place of each such character, you get a "?". Latin-1
|
||||
characters (characters 160-255) are unaffected.
|
||||
|
||||
Under EBCDIC platforms, C<e2char($n)> may not always be the
|
||||
same as C<chr(e2charnum($n))>, and ditto for
|
||||
C<$Name2character{$name}> and
|
||||
C<chr($Name2character_number{$name})>, because the strings are returned as
|
||||
native, and the numbers are returned as Unicode.
|
||||
However, for Perls starting with v5.8, C<e2char($n)> is the same as
|
||||
C<chr(utf8::unicode_to_native(e2charnum($n)))>, and ditto for
|
||||
C<$Name2character{$name}> and
|
||||
C<chr(utf8::unicode_to_native($Name2character_number{$name}))>.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<Pod::Browser> - a pod web server based on L<Catalyst>.
|
||||
|
||||
L<Pod::Checker> - check pod documents for syntax errors.
|
||||
|
||||
L<Pod::Coverage> - check if the documentation for a module is comprehensive.
|
||||
|
||||
L<perlpod> - description of pod format (for people documenting with pod).
|
||||
|
||||
L<perlpodspec> - specification of pod format (for people processing it).
|
||||
|
||||
L<Text::Unidecode> - ASCII transliteration of Unicode text.
|
||||
|
||||
=head1 REPOSITORY
|
||||
|
||||
L<https://github.com/neilbowers/Pod-Escapes>
|
||||
|
||||
=head1 COPYRIGHT AND DISCLAIMERS
|
||||
|
||||
Copyright (c) 2001-2004 Sean M. Burke. All rights reserved.
|
||||
|
||||
This library is free software; you can redistribute it and/or modify
|
||||
it under the same terms as Perl itself.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but
|
||||
without any warranty; without even the implied warranty of
|
||||
merchantability or fitness for a particular purpose.
|
||||
|
||||
Portions of the data tables in this module are derived from the
|
||||
entity declarations in the W3C XHTML specification.
|
||||
|
||||
Currently (October 2001), that's these three:
|
||||
|
||||
http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent
|
||||
http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent
|
||||
http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Sean M. Burke C<sburke@cpan.org>
|
||||
|
||||
Now being maintained by Neil Bowers E<lt>neilb@cpan.orgE<gt>
|
||||
|
||||
=cut
|
||||
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# What I used for reading the XHTML .ent files:
|
||||
|
||||
my(@norms, @good, @bad);
|
||||
my $dir = 'c:/sgml/docbook/';
|
||||
my %escapes;
|
||||
foreach my $file (qw(
|
||||
xhtml-symbol.ent
|
||||
xhtml-lat1.ent
|
||||
xhtml-special.ent
|
||||
)) {
|
||||
open(IN, "<$dir$file") or die "can't read-open $dir$file: $!";
|
||||
print "Reading $file...\n";
|
||||
while(<IN>) {
|
||||
if(m/<!ENTITY\s+(\S+)\s+"&#([^;]+);">/) {
|
||||
my($name, $value) = ($1,$2);
|
||||
next if $name eq 'quot' or $name eq 'apos' or $name eq 'gt';
|
||||
|
||||
$value = hex $1 if $value =~ m/^x([a-fA-F0-9]+)$/s;
|
||||
print "ILLEGAL VALUE $value" unless $value =~ m/^\d+$/s;
|
||||
if($value > 255) {
|
||||
push @good , sprintf " %-10s , chr(%s),\n", "'$name'", $value;
|
||||
push @bad , sprintf " %-10s , \$bad,\n", "'$name'", $value;
|
||||
} else {
|
||||
push @norms, sprintf " %-10s , chr(%s),\n", "'$name'", $value;
|
||||
}
|
||||
} elsif(m/<!ENT/) {
|
||||
print "# Skipping $_";
|
||||
}
|
||||
|
||||
}
|
||||
close(IN);
|
||||
}
|
||||
|
||||
print @norms;
|
||||
print "\n ( \$] .= 5.006001 ? (\n";
|
||||
print @good;
|
||||
print " ) : (\n";
|
||||
print @bad;
|
||||
print " )\n);\n";
|
||||
|
||||
__END__
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
||||
Reference in New Issue
Block a user