[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3-unattended/var/se3/unattended/install/linuxaux/opt/perl/lib/5.10.0/i586-linux-thread-multi/Encode/MIME/ -> Header.pm (source)

   1  package Encode::MIME::Header;
   2  use strict;
   3  use warnings;
   4  no warnings 'redefine';
   5  
   6  our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
   7  use Encode qw(find_encoding encode_utf8 decode_utf8);
   8  use MIME::Base64;
   9  use Carp;
  10  
  11  my %seed = (
  12      decode_b => '1',    # decodes 'B' encoding ?
  13      decode_q => '1',    # decodes 'Q' encoding ?
  14      encode   => 'B',    # encode with 'B' or 'Q' ?
  15      bpl      => 75,     # bytes per line
  16  );
  17  
  18  $Encode::Encoding{'MIME-Header'} =
  19    bless { %seed, Name => 'MIME-Header', } => __PACKAGE__;
  20  
  21  $Encode::Encoding{'MIME-B'} = bless {
  22      %seed,
  23      decode_q => 0,
  24      Name     => 'MIME-B',
  25  } => __PACKAGE__;
  26  
  27  $Encode::Encoding{'MIME-Q'} = bless {
  28      %seed,
  29      decode_q => 1,
  30      encode   => 'Q',
  31      Name     => 'MIME-Q',
  32  } => __PACKAGE__;
  33  
  34  use base qw(Encode::Encoding);
  35  
  36  sub needs_lines { 1 }
  37  sub perlio_ok   { 0 }
  38  
  39  sub decode($$;$) {
  40      use utf8;
  41      my ( $obj, $str, $chk ) = @_;
  42  
  43      # zap spaces between encoded words
  44      $str =~ s/\?=\s+=\?/\?==\?/gos;
  45  
  46      # multi-line header to single line
  47      $str =~ s/(:?\r|\n|\r\n)[ \t]//gos;
  48  
  49      1 while ( $str =~
  50          s/(\=\?[0-9A-Za-z\-_]+\?[Qq]\?)(.*?)\?\=\1(.*?)\?\=/$1$2$3\?\=/ )
  51        ;    # Concat consecutive QP encoded mime headers
  52             # Fixes breaking inside multi-byte characters
  53  
  54      $str =~ s{
  55          =\?                  # begin encoded word
  56          ([0-9A-Za-z\-_]+) # charset (encoding)
  57                  (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
  58          \?([QqBb])\?     # delimiter
  59          (.*?)            # Base64-encodede contents
  60          \?=              # end encoded word      
  61          }{
  62          if    (uc($2) eq 'B'){
  63              $obj->{decode_b} or croak qq(MIME "B" unsupported);
  64              decode_b($1, $3);
  65          }elsif(uc($2) eq 'Q'){
  66              $obj->{decode_q} or croak qq(MIME "Q" unsupported);
  67              decode_q($1, $3);
  68          }else{
  69              croak qq(MIME "$2" encoding is nonexistent!);
  70          }
  71          }egox;
  72      $_[1] = '' if $chk;
  73      return $str;
  74  }
  75  
  76  sub decode_b {
  77      my $enc  = shift;
  78      my $d    = find_encoding($enc) or croak qq(Unknown encoding "$enc");
  79      my $db64 = decode_base64(shift);
  80      return $d->name eq 'utf8'
  81        ? Encode::decode_utf8($db64)
  82        : $d->decode( $db64, Encode::FB_PERLQQ );
  83  }
  84  
  85  sub decode_q {
  86      my ( $enc, $q ) = @_;
  87      my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
  88      $q =~ s/_/ /go;
  89      $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
  90      return $d->name eq 'utf8'
  91        ? Encode::decode_utf8($q)
  92        : $d->decode( $q, Encode::FB_PERLQQ );
  93  }
  94  
  95  my $especials =
  96    join( '|' => map { quotemeta( chr($_) ) }
  97        unpack( "C*", qq{()<>@,;:\"\'/[]?.=} ) );
  98  
  99  my $re_encoded_word = qr{
 100         (?:
 101      =\?               # begin encoded word
 102      (?:[0-9A-Za-z\-_]+) # charset (encoding)
 103          (?:\*\w+(?:-\w+)*)? # language (RFC 2231)
 104      \?(?:[QqBb])\?      # delimiter
 105      (?:.*?)             # Base64-encodede contents
 106      \?=                 # end encoded word
 107         )
 108        }xo;
 109  
 110  my $re_especials = qr{$re_encoded_word|$especials}xo;
 111  
 112  sub encode($$;$) {
 113      my ( $obj, $str, $chk ) = @_;
 114      my @line = ();
 115      for my $line ( split /\r|\n|\r\n/o, $str ) {
 116          my ( @word, @subline );
 117          for my $word ( split /($re_especials)/o, $line ) {
 118              if (   $word =~ /[^\x00-\x7f]/o
 119                  or $word =~ /^$re_encoded_word$/o )
 120              {
 121                  push @word, $obj->_encode($word);
 122              }
 123              else {
 124                  push @word, $word;
 125              }
 126          }
 127          my $subline = '';
 128          for my $word (@word) {
 129              use bytes ();
 130              if ( bytes::length($subline) + bytes::length($word) >
 131                  $obj->{bpl} )
 132              {
 133                  push @subline, $subline;
 134                  $subline = '';
 135              }
 136              $subline .= $word;
 137          }
 138          $subline and push @subline, $subline;
 139          push @line, join( "\n " => @subline );
 140      }
 141      $_[1] = '' if $chk;
 142      return join( "\n", @line );
 143  }
 144  
 145  use constant HEAD   => '=?UTF-8?';
 146  use constant TAIL   => '?=';
 147  use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
 148  
 149  sub _encode {
 150      my ( $o, $str ) = @_;
 151      my $enc  = $o->{encode};
 152      my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) );
 153  
 154      # to coerce a floating-point arithmetics, the following contains
 155      # .0 in numbers -- dankogai
 156      $llen *= $enc eq 'B' ? 3.0 / 4.0 : 1.0 / 3.0;
 157      my @result = ();
 158      my $chunk  = '';
 159      while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) {
 160          use bytes ();
 161          if ( bytes::length($chunk) + bytes::length($chr) > $llen ) {
 162              push @result, SINGLE->{$enc}($chunk);
 163              $chunk = '';
 164          }
 165          $chunk .= $chr;
 166      }
 167      $chunk and push @result, SINGLE->{$enc}($chunk);
 168      return @result;
 169  }
 170  
 171  sub _encode_b {
 172      HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL;
 173  }
 174  
 175  sub _encode_q {
 176      my $chunk = shift;
 177      $chunk = encode_utf8($chunk);
 178      $chunk =~ s{
 179          ([^0-9A-Za-z])
 180             }{
 181             join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
 182             }egox;
 183      return HEAD . 'Q?' . $chunk . TAIL;
 184  }
 185  
 186  1;
 187  __END__
 188  
 189  =head1 NAME
 190  
 191  Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
 192  
 193  =head1 SYNOPSIS
 194  
 195      use Encode qw/encode decode/; 
 196      $utf8   = decode('MIME-Header', $header);
 197      $header = encode('MIME-Header', $utf8);
 198  
 199  =head1 ABSTRACT
 200  
 201  This module implements RFC 2047 Mime Header Encoding.  There are 3
 202  variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>.  The
 203  difference is described below
 204  
 205                decode()          encode()
 206    ----------------------------------------------
 207    MIME-Header Both B and Q      =?UTF-8?B?....?=
 208    MIME-B      B only; Q croaks  =?UTF-8?B?....?=
 209    MIME-Q      Q only; B croaks  =?UTF-8?Q?....?=
 210  
 211  =head1 DESCRIPTION
 212  
 213  When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
 214  is extracted and decoded for I<X> encoding (B for Base64, Q for
 215  Quoted-Printable). Then the decoded chunk is fed to
 216  decode(I<encoding>).  So long as I<encoding> is supported by Encode,
 217  any source encoding is fine.
 218  
 219  When you encode, it just encodes UTF-8 string with I<X> encoding then
 220  quoted with =?UTF-8?I<X>?....?= .  The parts that RFC 2047 forbids to
 221  encode are left as is and long lines are folded within 76 bytes per
 222  line.
 223  
 224  =head1 BUGS
 225  
 226  It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
 227  and =?ISO-8859-1?= but that makes the implementation too complicated.
 228  These days major mail agents all support =?UTF-8? so I think it is
 229  just good enough.
 230  
 231  Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by
 232  Makamaka.  Thre are still too many MUAs especially cellular phone
 233  handsets which does not grok UTF-8.
 234  
 235  =head1 SEE ALSO
 236  
 237  L<Encode>
 238  
 239  RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other
 240  locations. 
 241  
 242  =cut


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1