[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3-unattended/var/se3/unattended/install/linuxaux/opt/perl/lib/5.10.0/i586-linux-thread-multi/ -> re.pm (source)

   1  package re;
   2  
   3  # pragma for controlling the regex engine
   4  use strict;
   5  use warnings;
   6  
   7  our $VERSION     = "0.08";
   8  our @ISA         = qw(Exporter);
   9  our @EXPORT_OK   = qw(is_regexp regexp_pattern regmust 
  10                        regname regnames regnames_count);
  11  our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK;
  12  
  13  # *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING ***
  14  #
  15  # If you modify these values see comment below!
  16  
  17  my %bitmask = (
  18      taint   => 0x00100000, # HINT_RE_TAINT
  19      eval    => 0x00200000, # HINT_RE_EVAL
  20  );
  21  
  22  # - File::Basename contains a literal for 'taint' as a fallback.  If
  23  # taint is changed here, File::Basename must be updated as well.
  24  #
  25  # - ExtUtils::ParseXS uses a hardcoded 
  26  # BEGIN { $^H |= 0x00200000 } 
  27  # in it to allow re.xs to be built. So if 'eval' is changed here then
  28  # ExtUtils::ParseXS must be changed as well.
  29  #
  30  # *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING ***
  31  
  32  sub setcolor {
  33   eval {                # Ignore errors
  34    require Term::Cap;
  35  
  36    my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning.
  37    my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue';
  38    my @props = split /,/, $props;
  39    my $colors = join "\t", map {$terminal->Tputs($_,1)} @props;
  40  
  41    $colors =~ s/\0//g;
  42    $ENV{PERL_RE_COLORS} = $colors;
  43   };
  44   if ($@) {
  45      $ENV{PERL_RE_COLORS} ||= qq'\t\t> <\t> <\t\t';
  46   }
  47  
  48  }
  49  
  50  my %flags = (
  51      COMPILE         => 0x0000FF,
  52      PARSE           => 0x000001,
  53      OPTIMISE        => 0x000002,
  54      TRIEC           => 0x000004,
  55      DUMP            => 0x000008,
  56      FLAGS           => 0x000010,
  57  
  58      EXECUTE         => 0x00FF00,
  59      INTUIT          => 0x000100,
  60      MATCH           => 0x000200,
  61      TRIEE           => 0x000400,
  62  
  63      EXTRA           => 0xFF0000,
  64      TRIEM           => 0x010000,
  65      OFFSETS         => 0x020000,
  66      OFFSETSDBG      => 0x040000,
  67      STATE           => 0x080000,
  68      OPTIMISEM       => 0x100000,
  69      STACK           => 0x280000,
  70      BUFFERS         => 0x400000,
  71  );
  72  $flags{ALL} = -1 & ~($flags{OFFSETS}|$flags{OFFSETSDBG}|$flags{BUFFERS});
  73  $flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE};
  74  $flags{Extra} = $flags{EXECUTE} | $flags{COMPILE};
  75  $flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE};
  76  $flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE};
  77  $flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC};
  78  
  79  my $installed;
  80  my $installed_error;
  81  
  82  sub _do_install {
  83      if ( ! defined($installed) ) {
  84          require XSLoader;
  85          $installed = eval { XSLoader::load('re', $VERSION) } || 0;
  86          $installed_error = $@;
  87      }
  88  }
  89  
  90  sub _load_unload {
  91      my ($on)= @_;
  92      if ($on) {
  93          _do_install();        
  94          if ( ! $installed ) {
  95              die "'re' not installed!? ($installed_error)";
  96      } else {
  97          # We call install() every time, as if we didn't, we wouldn't
  98          # "see" any changes to the color environment var since
  99          # the last time it was called.
 100  
 101          # install() returns an integer, which if casted properly
 102          # in C resolves to a structure containing the regex
 103          # hooks. Setting it to a random integer will guarantee
 104          # segfaults.
 105          $^H{regcomp} = install();
 106          }
 107      } else {
 108          delete $^H{regcomp};
 109      }
 110  }
 111  
 112  sub bits {
 113      my $on = shift;
 114      my $bits = 0;
 115      unless (@_) {
 116      require Carp;
 117      Carp::carp("Useless use of \"re\" pragma"); 
 118      }
 119      foreach my $idx (0..$#_){
 120          my $s=$_[$idx];
 121          if ($s eq 'Debug' or $s eq 'Debugcolor') {
 122              setcolor() if $s =~/color/i;
 123              ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS};
 124              for my $idx ($idx+1..$#_) {
 125                  if ($flags{$_[$idx]}) {
 126                      if ($on) {
 127                          ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]};
 128                      } else {
 129                          ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]};
 130                      }
 131                  } else {
 132                      require Carp;
 133                      Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ",
 134                                 join(", ",sort keys %flags ) );
 135                  }
 136              }
 137              _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS});
 138              last;
 139          } elsif ($s eq 'debug' or $s eq 'debugcolor') {
 140          setcolor() if $s =~/color/i;
 141          _load_unload($on);
 142          last;
 143          } elsif (exists $bitmask{$s}) {
 144          $bits |= $bitmask{$s};
 145      } elsif ($EXPORT_OK{$s}) {
 146          _do_install();
 147          require Exporter;
 148          re->export_to_level(2, 're', $s);
 149      } else {
 150          require Carp;
 151          Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ",
 152                         join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask),
 153                         ")");
 154      }
 155      }
 156      $bits;
 157  }
 158  
 159  sub import {
 160      shift;
 161      $^H |= bits(1, @_);
 162  }
 163  
 164  sub unimport {
 165      shift;
 166      $^H &= ~ bits(0, @_);
 167  }
 168  
 169  1;
 170  
 171  __END__
 172  
 173  =head1 NAME
 174  
 175  re - Perl pragma to alter regular expression behaviour
 176  
 177  =head1 SYNOPSIS
 178  
 179      use re 'taint';
 180      ($x) = ($^X =~ /^(.*)$/s);     # $x is tainted here
 181  
 182      $pat = '(?{ $foo = 1 })';
 183      use re 'eval';
 184      /foo$pat}bar/;           # won't fail (when not under -T switch)
 185  
 186      {
 187      no re 'taint';           # the default
 188      ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here
 189  
 190      no re 'eval';           # the default
 191      /foo$pat}bar/;           # disallowed (with or without -T switch)
 192      }
 193  
 194      use re 'debug';           # output debugging info during
 195      /^(.*)$/s;               #     compile and run time
 196  
 197  
 198      use re 'debugcolor';       # same as 'debug', but with colored output
 199      ...
 200  
 201      use re qw(Debug All);          # Finer tuned debugging options.
 202      use re qw(Debug More);
 203      no re qw(Debug ALL);           # Turn of all re debugging in this scope
 204  
 205      use re qw(is_regexp regexp_pattern); # import utility functions
 206      my ($pat,$mods)=regexp_pattern(qr/foo/i);
 207      if (is_regexp($obj)) { 
 208          print "Got regexp: ",
 209              scalar regexp_pattern($obj); # just as perl would stringify it
 210      }                                    # but no hassle with blessed re's.
 211  
 212  (We use $^X in these examples because it's tainted by default.)
 213  
 214  =head1 DESCRIPTION
 215  
 216  =head2 'taint' mode
 217  
 218  When C<use re 'taint'> is in effect, and a tainted string is the target
 219  of a regex, the regex memories (or values returned by the m// operator
 220  in list context) are tainted.  This feature is useful when regex operations
 221  on tainted data aren't meant to extract safe substrings, but to perform
 222  other transformations.
 223  
 224  =head2 'eval' mode
 225  
 226  When C<use re 'eval'> is in effect, a regex is allowed to contain
 227  C<(?{ ... })> zero-width assertions even if regular expression contains
 228  variable interpolation.  That is normally disallowed, since it is a
 229  potential security risk.  Note that this pragma is ignored when the regular
 230  expression is obtained from tainted data, i.e.  evaluation is always
 231  disallowed with tainted regular expressions.  See L<perlre/(?{ code })>.
 232  
 233  For the purpose of this pragma, interpolation of precompiled regular
 234  expressions (i.e., the result of C<qr//>) is I<not> considered variable
 235  interpolation.  Thus:
 236  
 237      /foo$pat}bar/
 238  
 239  I<is> allowed if $pat is a precompiled regular expression, even
 240  if $pat contains C<(?{ ... })> assertions.
 241  
 242  =head2 'debug' mode
 243  
 244  When C<use re 'debug'> is in effect, perl emits debugging messages when
 245  compiling and using regular expressions.  The output is the same as that
 246  obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the
 247  B<-Dr> switch. It may be quite voluminous depending on the complexity
 248  of the match.  Using C<debugcolor> instead of C<debug> enables a
 249  form of output that can be used to get a colorful display on terminals
 250  that understand termcap color sequences.  Set C<$ENV{PERL_RE_TC}> to a
 251  comma-separated list of C<termcap> properties to use for highlighting
 252  strings on/off, pre-point part on/off.
 253  See L<perldebug/"Debugging regular expressions"> for additional info.
 254  
 255  As of 5.9.5 the directive C<use re 'debug'> and its equivalents are
 256  lexically scoped, as the other directives are.  However they have both 
 257  compile-time and run-time effects.
 258  
 259  See L<perlmodlib/Pragmatic Modules>.
 260  
 261  =head2 'Debug' mode
 262  
 263  Similarly C<use re 'Debug'> produces debugging output, the difference
 264  being that it allows the fine tuning of what debugging output will be
 265  emitted. Options are divided into three groups, those related to
 266  compilation, those related to execution and those related to special
 267  purposes. The options are as follows:
 268  
 269  =over 4
 270  
 271  =item Compile related options
 272  
 273  =over 4
 274  
 275  =item COMPILE
 276  
 277  Turns on all compile related debug options.
 278  
 279  =item PARSE
 280  
 281  Turns on debug output related to the process of parsing the pattern.
 282  
 283  =item OPTIMISE
 284  
 285  Enables output related to the optimisation phase of compilation.
 286  
 287  =item TRIEC
 288  
 289  Detailed info about trie compilation.
 290  
 291  =item DUMP
 292  
 293  Dump the final program out after it is compiled and optimised.
 294  
 295  =back
 296  
 297  =item Execute related options
 298  
 299  =over 4
 300  
 301  =item EXECUTE
 302  
 303  Turns on all execute related debug options.
 304  
 305  =item MATCH
 306  
 307  Turns on debugging of the main matching loop.
 308  
 309  =item TRIEE
 310  
 311  Extra debugging of how tries execute.
 312  
 313  =item INTUIT
 314  
 315  Enable debugging of start point optimisations.
 316  
 317  =back
 318  
 319  =item Extra debugging options
 320  
 321  =over 4
 322  
 323  =item EXTRA
 324  
 325  Turns on all "extra" debugging options.
 326  
 327  =item BUFFERS
 328  
 329  Enable debugging the capture buffer storage during match. Warning,
 330  this can potentially produce extremely large output.
 331  
 332  =item TRIEM
 333  
 334  Enable enhanced TRIE debugging. Enhances both TRIEE
 335  and TRIEC.
 336  
 337  =item STATE
 338  
 339  Enable debugging of states in the engine.
 340  
 341  =item STACK
 342  
 343  Enable debugging of the recursion stack in the engine. Enabling
 344  or disabling this option automatically does the same for debugging
 345  states as well. This output from this can be quite large.
 346  
 347  =item OPTIMISEM
 348  
 349  Enable enhanced optimisation debugging and start point optimisations.
 350  Probably not useful except when debugging the regex engine itself.
 351  
 352  =item OFFSETS
 353  
 354  Dump offset information. This can be used to see how regops correlate
 355  to the pattern. Output format is
 356  
 357     NODENUM:POSITION[LENGTH]
 358  
 359  Where 1 is the position of the first char in the string. Note that position
 360  can be 0, or larger than the actual length of the pattern, likewise length
 361  can be zero.
 362  
 363  =item OFFSETSDBG
 364  
 365  Enable debugging of offsets information. This emits copious
 366  amounts of trace information and doesn't mesh well with other
 367  debug options.
 368  
 369  Almost definitely only useful to people hacking
 370  on the offsets part of the debug engine.
 371  
 372  =back
 373  
 374  =item Other useful flags
 375  
 376  These are useful shortcuts to save on the typing.
 377  
 378  =over 4
 379  
 380  =item ALL
 381  
 382  Enable all options at once except OFFSETS, OFFSETSDBG and BUFFERS
 383  
 384  =item All
 385  
 386  Enable DUMP and all execute options. Equivalent to:
 387  
 388    use re 'debug';
 389  
 390  =item MORE
 391  
 392  =item More
 393  
 394  Enable TRIEM and all execute compile and execute options.
 395  
 396  =back
 397  
 398  =back
 399  
 400  As of 5.9.5 the directive C<use re 'debug'> and its equivalents are
 401  lexically scoped, as the other directives are.  However they have both
 402  compile-time and run-time effects.
 403  
 404  =head2 Exportable Functions
 405  
 406  As of perl 5.9.5 're' debug contains a number of utility functions that
 407  may be optionally exported into the caller's namespace. They are listed
 408  below.
 409  
 410  =over 4
 411  
 412  =item is_regexp($ref)
 413  
 414  Returns true if the argument is a compiled regular expression as returned
 415  by C<qr//>, false if it is not.
 416  
 417  This function will not be confused by overloading or blessing. In
 418  internals terms, this extracts the regexp pointer out of the
 419  PERL_MAGIC_qr structure so it it cannot be fooled.
 420  
 421  =item regexp_pattern($ref)
 422  
 423  If the argument is a compiled regular expression as returned by C<qr//>,
 424  then this function returns the pattern.
 425  
 426  In list context it returns a two element list, the first element
 427  containing the pattern and the second containing the modifiers used when
 428  the pattern was compiled.
 429  
 430    my ($pat, $mods) = regexp_pattern($ref);
 431  
 432  In scalar context it returns the same as perl would when strigifying a raw
 433  C<qr//> with the same pattern inside.  If the argument is not a compiled
 434  reference then this routine returns false but defined in scalar context,
 435  and the empty list in list context. Thus the following
 436  
 437      if (regexp_pattern($ref) eq '(?i-xsm:foo)')
 438  
 439  will be warning free regardless of what $ref actually is.
 440  
 441  Like C<is_regexp> this function will not be confused by overloading
 442  or blessing of the object.
 443  
 444  =item regmust($ref)
 445  
 446  If the argument is a compiled regular expression as returned by C<qr//>,
 447  then this function returns what the optimiser consiers to be the longest
 448  anchored fixed string and longest floating fixed string in the pattern.
 449  
 450  A I<fixed string> is defined as being a substring that must appear for the
 451  pattern to match. An I<anchored fixed string> is a fixed string that must
 452  appear at a particular offset from the beginning of the match. A I<floating
 453  fixed string> is defined as a fixed string that can appear at any point in
 454  a range of positions relative to the start of the match. For example,
 455  
 456      my $qr = qr/here .* there/x;
 457      my ($anchored, $floating) = regmust($qr);
 458      print "anchored:'$anchored'\nfloating:'$floating'\n";
 459  
 460  results in
 461  
 462      anchored:'here'
 463      floating:'there'
 464  
 465  Because the C<here> is before the C<.*> in the pattern, its position
 466  can be determined exactly. That's not true, however, for the C<there>;
 467  it could appear at any point after where the anchored string appeared.
 468  Perl uses both for its optimisations, prefering the longer, or, if they are
 469  equal, the floating.
 470  
 471  B<NOTE:> This may not necessarily be the definitive longest anchored and
 472  floating string. This will be what the optimiser of the Perl that you
 473  are using thinks is the longest. If you believe that the result is wrong
 474  please report it via the L<perlbug> utility.
 475  
 476  =item regname($name,$all)
 477  
 478  Returns the contents of a named buffer of the last successful match. If
 479  $all is true, then returns an array ref containing one entry per buffer,
 480  otherwise returns the first defined buffer.
 481  
 482  =item regnames($all)
 483  
 484  Returns a list of all of the named buffers defined in the last successful
 485  match. If $all is true, then it returns all names defined, if not it returns
 486  only names which were involved in the match.
 487  
 488  =item regnames_count()
 489  
 490  Returns the number of distinct names defined in the pattern used
 491  for the last successful match.
 492  
 493  B<Note:> this result is always the actual number of distinct
 494  named buffers defined, it may not actually match that which is
 495  returned by C<regnames()> and related routines when those routines
 496  have not been called with the $all parameter set.
 497  
 498  =back
 499  
 500  =head1 SEE ALSO
 501  
 502  L<perlmodlib/Pragmatic Modules>.
 503  
 504  =cut


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1