[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3master/usr/src/Encode-compat-0.05/blib/lib/Encode/compat/ -> Alias.pm (source)

   1  # $File: //member/autrijus/.vimrc $ $Author: autrijus $
   2  # $Revision: #1 $ $Change: 1649 $ $DateTime: 2002/10/24 15:21:23 $
   3  
   4  package Encode::compat::Alias;
   5  our $VERSION = '0.05';
   6  
   7  1;
   8  
   9  package Encode::Alias;
  10  use strict;
  11  our $VERSION = '0.05';
  12  our $DEBUG = 0;
  13  
  14  use base qw(Exporter);
  15  
  16  # Public, encouraged API is exported by default
  17  
  18  our @EXPORT = 
  19      qw (
  20      define_alias
  21      find_alias
  22      );
  23  
  24  our @Alias;  # ordered matching list
  25  our %Alias;  # cached known aliases
  26  
  27  sub find_alias
  28  {
  29      my $class = shift;
  30      local $_ = shift;
  31      unless (exists $Alias{$_})
  32      {
  33          $Alias{$_} = undef; # Recursion guard
  34      for (my $i=0; $i < @Alias; $i += 2)
  35      {
  36          my $alias = $Alias[$i];
  37          my $val   = $Alias[$i+1];
  38          my $new;
  39          if (ref($alias) eq 'Regexp' && $_ =~ $alias)
  40          {
  41          $DEBUG and warn "eval $val";
  42          $new = eval $val;
  43          # $@ and warn "$val, $@";
  44          }
  45          elsif (ref($alias) eq 'CODE')
  46          {
  47          $DEBUG and warn "$alias", "->", "($val)";
  48          $new = $alias->($val);
  49          }
  50          elsif (lc($_) eq lc($alias))
  51          {
  52          $new = $val;
  53          }
  54          if (defined($new))
  55          {
  56          next if $new eq $_; # avoid (direct) recursion on bugs
  57          $DEBUG and warn "$alias, $new";
  58          my $enc = (ref($new)) ? $new : Encode::find_encoding($new);
  59          if ($enc)
  60          {
  61              $Alias{$_} = $enc;
  62              last;
  63          }
  64          }
  65      }
  66      }
  67      if ($DEBUG){
  68      my $name;
  69      if (my $e = $Alias{$_}){
  70          $name = $e->name;
  71      }else{
  72          $name = "";
  73      }
  74      warn "find_alias($class, $_)->name = $name";
  75      }
  76      return $Alias{$_};
  77  }
  78  
  79  sub define_alias
  80  {
  81      while (@_)
  82      {
  83      my ($alias,$name) = splice(@_,0,2);
  84      unshift(@Alias, $alias => $name);   # newer one has precedence
  85      # clear %Alias cache to allow overrides
  86      if (ref($alias)){
  87          my @a = keys %Alias;
  88          for my $k (@a){
  89          if (ref($alias) eq 'Regexp' && $k =~ $alias)
  90          {
  91              $DEBUG and warn "delete \$Alias\{$k\}";
  92              delete $Alias{$k};
  93          }
  94          elsif (ref($alias) eq 'CODE')
  95          {
  96              $DEBUG and warn "delete \$Alias\{$k\}";
  97              delete $Alias{$alias->($name)};
  98          }
  99          }
 100      }else{
 101          $DEBUG and warn "delete \$Alias\{$alias\}";
 102          delete $Alias{$alias};
 103      }
 104      }
 105  }
 106  
 107  # Allow latin-1 style names as well
 108                       # 0  1  2  3  4  5   6   7   8   9  10
 109  our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
 110  # Allow winlatin1 style names as well
 111  our %Winlatin2cp   = (
 112                'latin1'     => 1252,
 113                'latin2'     => 1250,
 114                'cyrillic'   => 1251,
 115                'greek'      => 1253,
 116                'turkish'    => 1254,
 117                'hebrew'     => 1255,
 118                'arabic'     => 1256,
 119                'baltic'     => 1257,
 120                'vietnamese' => 1258,
 121               );
 122  
 123  init_aliases();
 124  
 125  sub undef_aliases{
 126      @Alias = ();
 127      %Alias = ();
 128  }
 129  
 130  sub init_aliases
 131  {
 132      undef_aliases();
 133  
 134      # Try all-lower-case version should all else fails
 135      define_alias( qr/^(.*)$/ => '"\L$1"' );
 136  
 137      # UTF/UCS stuff
 138      define_alias( qr/^UCS-?2-?LE$/i       => '"UCS-2LE"' );
 139      define_alias( qr/^UCS-?2-?(BE)?$/i    => '"UCS-2BE"',
 140                    qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")',
 141            qr/^iso-10646-1$/i      => '"UCS-2BE"' );
 142      define_alias( qr/^UTF(16|32)-?BE$/i   => '"UTF-$1BE"',
 143            qr/^UTF(16|32)-?LE$/i   => '"UTF-$1LE"',
 144            qr/^UTF(16|32)$/i       => '"UTF-$1"',
 145          );
 146      # ASCII
 147      define_alias(qr/^(?:US-?)ascii$/i => '"ascii"');
 148      define_alias('C' => 'ascii');
 149      define_alias(qr/\bISO[-_]?646[-_]?US$/i => '"ascii"');
 150      # Allow variants of iso-8859-1 etc.
 151      define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
 152  
 153      # At least HP-UX has these.
 154      define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' );
 155  
 156      # More HP stuff.
 157      define_alias( qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => '"$1}8"' );
 158  
 159      # The Official name of ASCII.
 160      define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
 161  
 162      # This is a font issue, not an encoding issue.
 163      # (The currency symbol of the Latin 1 upper half
 164      #  has been redefined as the euro symbol.)
 165      define_alias( qr/^(.+)\@euro$/i => '"$1"' );
 166  
 167      define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i 
 168            => 'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef' );
 169  
 170      define_alias( qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
 171               hebrew|arabic|baltic|vietnamese)$/ix => 
 172            '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' );
 173  
 174      # Common names for non-latin prefered MIME names
 175      define_alias( 'ascii'    => 'US-ascii',
 176            'cyrillic' => 'iso-8859-5',
 177            'arabic'   => 'iso-8859-6',
 178            'greek'    => 'iso-8859-7',
 179            'hebrew'   => 'iso-8859-8',
 180            'thai'     => 'iso-8859-11',
 181            'tis620'   => 'iso-8859-11',
 182            );
 183  
 184      # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
 185      # And Microsoft has their own naming (again, surprisingly).
 186      # And windows-* is registered in IANA! 
 187      define_alias( qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"');
 188  
 189      # Sometimes seen with a leading zero.
 190      # define_alias( qr/\bcp037\b/i => '"cp37"');
 191  
 192      # Mac Mappings
 193      # predefined in *.ucm; unneeded
 194      # define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
 195      define_alias( qr/^mac_(.*)$/i => '"mac$1"');
 196      # Ououououou. gone.  They are differente!
 197      # define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
 198    
 199      # Standardize on the dashed versions.
 200      # define_alias( qr/\butf8$/i  => 'utf-8' );
 201      define_alias( qr/\bkoi8r$/i => 'koi8-r' );
 202      define_alias( qr/\bkoi8u$/i => 'koi8-u' );
 203  
 204      unless ($Encode::ON_EBCDIC){
 205          # for Encode::CN
 206      define_alias( qr/\beuc.*cn$/i        => '"euc-cn"' );
 207      define_alias( qr/\bcn.*euc$/i        => '"euc-cn"' );
 208      # define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
 209      # CP936 doesn't have vendor-addon for GBK, so they're identical.
 210      define_alias( qr/^gbk$/i => '"cp936"');
 211      # This fixes gb2312 vs. euc-cn confusion, practically
 212      define_alias( qr/\bGB[-_ ]?2312(?:\D.*$|$)/i => '"euc-cn"' );
 213      # for Encode::JP
 214      define_alias( qr/\bjis$/i            => '"7bit-jis"' );
 215      define_alias( qr/\beuc.*jp$/i        => '"euc-jp"' );
 216      define_alias( qr/\bjp.*euc$/i        => '"euc-jp"' );
 217      define_alias( qr/\bujis$/i           => '"euc-jp"' );
 218      define_alias( qr/\bshift.*jis$/i     => '"shiftjis"' );
 219      define_alias( qr/\bsjis$/i           => '"shiftjis"' );
 220          # for Encode::KR
 221      define_alias( qr/\beuc.*kr$/i        => '"euc-kr"' );
 222      define_alias( qr/\bkr.*euc$/i        => '"euc-kr"' );
 223      # This fixes ksc5601 vs. euc-kr confusion, practically
 224          define_alias( qr/(?:x-)?uhc$/i            => '"cp949"' );
 225          define_alias( qr/(?:x-)?windows-949$/i    => '"cp949"' );
 226          define_alias( qr/\bks_c_5601-1987$/i      => '"cp949"' );
 227          # for Encode::TW
 228      define_alias( qr/\bbig-?5$/i          => '"big5-eten"' );
 229      define_alias( qr/\bbig5-?et(?:en)?$/i      => '"big5-eten"' );
 230      define_alias( qr/\btca[-_]?big5$/i      => '"big5-eten"' );
 231      define_alias( qr/\bbig5-?hk(?:scs)?$/i      => '"big5-hkscs"' );
 232      define_alias( qr/\bhk(?:scs)?[-_]?big5$/i  => '"big5-hkscs"' );
 233      }
 234      # utf8 is blessed :)
 235      define_alias( qr/^UTF-8$/i => '"utf8"',);
 236      # At last, Map white space and _ to '-'
 237      define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
 238  }
 239  
 240  1;
 241  __END__
 242  
 243  # TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
 244  # TODO: HP-UX '15' encodings japanese15 korean15 roi15
 245  # TODO: Cyrillic encoding ISO-IR-111 (useful?)
 246  # TODO: Armenian encoding ARMSCII-8
 247  # TODO: Hebrew encoding ISO-8859-8-1
 248  # TODO: Thai encoding TCVN
 249  # TODO: Vietnamese encodings VPS
 250  # TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
 251  #       ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
 252  #       Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
 253  #       Kannada Khmer Korean Laotian Malayalam Mongolian
 254  #       Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
 255  
 256  =head1 NAME
 257  
 258  Encode::Alias - alias definitions to encodings
 259  
 260  =head1 SYNOPSIS
 261  
 262    use Encode;
 263    use Encode::Alias;
 264    define_alias( newName => ENCODING);
 265  
 266  =head1 DESCRIPTION
 267  
 268  Allows newName to be used as an alias for ENCODING. ENCODING may be
 269  either the name of an encoding or an encoding object (as described 
 270  in L<Encode>).
 271  
 272  Currently I<newName> can be specified in the following ways:
 273  
 274  =over 4
 275  
 276  =item As a simple string.
 277  
 278  =item As a qr// compiled regular expression, e.g.:
 279  
 280    define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
 281  
 282  In this case, if I<ENCODING> is not a reference, it is C<eval>-ed
 283  in order to allow C<$1> etc. to be substituted.  The example is one
 284  way to alias names as used in X11 fonts to the MIME names for the
 285  iso-8859-* family.  Note the double quotes inside the single quotes.
 286  
 287  If you are using a regex here, you have to use the quotes as shown or
 288  it won't work.  Also note that regex handling is tricky even for the
 289  experienced.  Use it with caution.
 290  
 291  =item As a code reference, e.g.:
 292  
 293    define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , '');
 294  
 295  In this case, C<$_> will be set to the name that is being looked up and
 296  I<ENCODING> is passed to the sub as its first argument.  The example
 297  is another way to alias names as used in X11 fonts to the MIME names
 298  for the iso-8859-* family.
 299  
 300  =back
 301  
 302  =head2 Alias overloading
 303  
 304  You can override predefined aliases by simply applying define_alias().
 305  The new alias is always evaluated first, and when neccessary,
 306  define_alias() flushes the internal cache to make the new definition
 307  available.
 308  
 309    # redirect SHIFT_JIS to MS/IBM Code Page 932, which is a
 310    # superset of SHIFT_JIS
 311  
 312    define_alias( qr/shift.*jis$/i  => '"cp932"' );
 313    define_alias( qr/sjis$/i        => '"cp932"' );
 314  
 315  If you want to zap all predefined aliases, you can use
 316  
 317    Encode::Alias->undef_aliases;
 318  
 319  to do so.  And
 320  
 321    Encode::Alias->init_aliases;
 322  
 323  gets the factory settings back.
 324  
 325  =head1 SEE ALSO
 326  
 327  L<Encode>, L<Encode::Supported>
 328  
 329  =cut
 330  


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1