[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3-unattended/var/se3/unattended/install/linuxaux/opt/perl/lib/5.10.0/i586-linux-thread-multi/Encode/Unicode/ -> UTF7.pm (source)

   1  #
   2  # $Id: UTF7.pm,v 2.4 2006/06/03 20:28:48 dankogai Exp $
   3  #
   4  package Encode::Unicode::UTF7;
   5  use strict;
   6  use warnings;
   7  no warnings 'redefine';
   8  use base qw(Encode::Encoding);
   9  __PACKAGE__->Define('UTF-7');
  10  our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
  11  use MIME::Base64;
  12  use Encode;
  13  
  14  #
  15  # Algorithms taken from Unicode::String by Gisle Aas
  16  #
  17  
  18  our $OPTIONAL_DIRECT_CHARS = 1;
  19  my $specials = quotemeta "\'(),-./:?";
  20  $OPTIONAL_DIRECT_CHARS
  21    and $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}";
  22  
  23  # \s will not work because it matches U+3000 DEOGRAPHIC SPACE
  24  # We use qr/[\n\r\t\ ] instead
  25  my $re_asis    = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/;
  26  my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/;
  27  my $e_utf16    = find_encoding("UTF-16BE");
  28  
  29  sub needs_lines { 1 }
  30  
  31  sub encode($$;$) {
  32      my ( $obj, $str, $chk ) = @_;
  33      my $len = length($str);
  34      pos($str) = 0;
  35      my $bytes = '';
  36      while ( pos($str) < $len ) {
  37          if ( $str =~ /\G($re_asis+)/ogc ) {
  38              $bytes .= $1;
  39          }
  40          elsif ( $str =~ /\G($re_encoded+)/ogsc ) {
  41              if ( $1 eq "+" ) {
  42                  $bytes .= "+-";
  43              }
  44              else {
  45                  my $s = $1;
  46                  my $base64 = encode_base64( $e_utf16->encode($s), '' );
  47                  $base64 =~ s/=+$//;
  48                  $bytes .= "+$base64-";
  49              }
  50          }
  51          else {
  52              die "This should not happen! (pos=" . pos($str) . ")";
  53          }
  54      }
  55      $_[1] = '' if $chk;
  56      return $bytes;
  57  }
  58  
  59  sub decode($$;$) {
  60      my ( $obj, $bytes, $chk ) = @_;
  61      my $len = length($bytes);
  62      my $str = "";
  63      no warnings 'uninitialized';
  64      while ( pos($bytes) < $len ) {
  65          if ( $bytes =~ /\G([^+]+)/ogc ) {
  66              $str .= $1;
  67          }
  68          elsif ( $bytes =~ /\G\+-/ogc ) {
  69              $str .= "+";
  70          }
  71          elsif ( $bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc ) {
  72              my $base64 = $1;
  73              my $pad    = length($base64) % 4;
  74              $base64 .= "=" x ( 4 - $pad ) if $pad;
  75              $str .= $e_utf16->decode( decode_base64($base64) );
  76          }
  77          elsif ( $bytes =~ /\G\+/ogc ) {
  78              $^W and warn "Bad UTF7 data escape";
  79              $str .= "+";
  80          }
  81          else {
  82              die "This should not happen " . pos($bytes);
  83          }
  84      }
  85      $_[1] = '' if $chk;
  86      return $str;
  87  }
  88  1;
  89  __END__
  90  
  91  =head1 NAME
  92  
  93  Encode::Unicode::UTF7 -- UTF-7 encoding
  94  
  95  =head1 SYNOPSIS
  96  
  97      use Encode qw/encode decode/; 
  98      $utf7 = encode("UTF-7", $utf8);
  99      $utf8 = decode("UTF-7", $ucs2);
 100  
 101  =head1 ABSTRACT
 102  
 103  This module implements UTF-7 encoding documented in RFC 2152.  UTF-7,
 104  as its name suggests, is a 7-bit re-encoded version of UTF-16BE.  It
 105  is designed to be MTA-safe and expected to be a standard way to
 106  exchange Unicoded mails via mails.  But with the advent of UTF-8 and
 107  8-bit compliant MTAs, UTF-7 is hardly ever used.
 108  
 109  UTF-7 was not supported by Encode until version 1.95 because of that.
 110  But Unicode::String, a module by Gisle Aas which adds Unicode supports
 111  to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added
 112  so Encode can supersede Unicode::String 100%.
 113  
 114  =head1 In Practice
 115  
 116  When you want to encode Unicode for mails and web pages, however, do
 117  not use UTF-7 unless you are sure your recipients and readers can
 118  handle it.  Very few MUAs and WWW Browsers support these days (only
 119  Mozilla seems to support one).  For general cases, use UTF-8 for
 120  message body and MIME-Header for header instead.
 121  
 122  =head1 SEE ALSO
 123  
 124  L<Encode>, L<Encode::Unicode>, L<Unicode::String>
 125  
 126  RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt>
 127  
 128  =cut


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1