Initial Commit

2025-12-03 16:38:10 +01:00
parent c5e26bf594
commit b732d8d4b5
17680 changed files with 5977495 additions and 2 deletions
--- a/database/perl/lib/Encode/Unicode/UTF7.pm
+++ b/database/perl/lib/Encode/Unicode/UTF7.pm
@@ -0,0 +1,133 @@
+#
+# $Id: UTF7.pm,v 2.10 2017/06/10 17:23:50 dankogai Exp $
+#
+package Encode::Unicode::UTF7;
+use strict;
+use warnings;
+use parent qw(Encode::Encoding);
+__PACKAGE__->Define('UTF-7');
+our $VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+use MIME::Base64;
+use Encode qw(find_encoding);
+
+#
+# Algorithms taken from Unicode::String by Gisle Aas
+#
+
+our $OPTIONAL_DIRECT_CHARS = 1;
+my $specials = quotemeta "\'(),-./:?";
+$OPTIONAL_DIRECT_CHARS
+  and $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}";
+
+# \s will not work because it matches U+3000 DEOGRAPHIC SPACE
+# We use qr/[\n\r\t\ ] instead
+my $re_asis    = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/;
+my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/;
+my $e_utf16    = find_encoding("UTF-16BE");
+
+sub needs_lines { 1 }
+
+sub encode($$;$) {
+    my ( $obj, $str, $chk ) = @_;
+    return undef unless defined $str;
+    my $len = length($str);
+    pos($str) = 0;
+    my $bytes = substr($str, 0, 0); # to propagate taintedness
+    while ( pos($str) < $len ) {
+        if ( $str =~ /\G($re_asis+)/ogc ) {
+	    my $octets = $1;
+	    utf8::downgrade($octets);
+	    $bytes .= $octets;
+        }
+        elsif ( $str =~ /\G($re_encoded+)/ogsc ) {
+            if ( $1 eq "+" ) {
+                $bytes .= "+-";
+            }
+            else {
+                my $s = $1;
+                my $base64 = encode_base64( $e_utf16->encode($s), '' );
+                $base64 =~ s/=+$//;
+                $bytes .= "+$base64-";
+            }
+        }
+        else {
+            die "This should not happen! (pos=" . pos($str) . ")";
+        }
+    }
+    $_[1] = '' if $chk;
+    return $bytes;
+}
+
+sub decode($$;$) {
+    use re 'taint';
+    my ( $obj, $bytes, $chk ) = @_;
+    return undef unless defined $bytes;
+    my $len = length($bytes);
+    my $str = substr($bytes, 0, 0); # to propagate taintedness;
+    pos($bytes) = 0;
+    no warnings 'uninitialized';
+    while ( pos($bytes) < $len ) {
+        if ( $bytes =~ /\G([^+]+)/ogc ) {
+            $str .= $1;
+        }
+        elsif ( $bytes =~ /\G\+-/ogc ) {
+            $str .= "+";
+        }
+        elsif ( $bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc ) {
+            my $base64 = $1;
+            my $pad    = length($base64) % 4;
+            $base64 .= "=" x ( 4 - $pad ) if $pad;
+            $str .= $e_utf16->decode( decode_base64($base64) );
+        }
+        elsif ( $bytes =~ /\G\+/ogc ) {
+            $^W and warn "Bad UTF7 data escape";
+            $str .= "+";
+        }
+        else {
+            die "This should not happen " . pos($bytes);
+        }
+    }
+    $_[1] = '' if $chk;
+    return $str;
+}
+1;
+__END__
+
+=head1 NAME
+
+Encode::Unicode::UTF7 -- UTF-7 encoding
+
+=head1 SYNOPSIS
+
+    use Encode qw/encode decode/; 
+    $utf7 = encode("UTF-7", $utf8);
+    $utf8 = decode("UTF-7", $ucs2);
+
+=head1 ABSTRACT
+
+This module implements UTF-7 encoding documented in RFC 2152.  UTF-7,
+as its name suggests, is a 7-bit re-encoded version of UTF-16BE.  It
+is designed to be MTA-safe and expected to be a standard way to
+exchange Unicoded mails via mails.  But with the advent of UTF-8 and
+8-bit compliant MTAs, UTF-7 is hardly ever used.
+
+UTF-7 was not supported by Encode until version 1.95 because of that.
+But Unicode::String, a module by Gisle Aas which adds Unicode supports
+to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added
+so Encode can supersede Unicode::String 100%.
+
+=head1 In Practice
+
+When you want to encode Unicode for mails and web pages, however, do
+not use UTF-7 unless you are sure your recipients and readers can
+handle it.  Very few MUAs and WWW Browsers support these days (only
+Mozilla seems to support one).  For general cases, use UTF-8 for
+message body and MIME-Header for header instead.
+
+=head1 SEE ALSO
+
+L<Encode>, L<Encode::Unicode>, L<Unicode::String>
+
+RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt>
+
+=cut