[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3-unattended/var/se3/unattended/install/linuxaux/opt/perl/lib/site_perl/5.10.0/i586-linux-thread-multi/XML/ -> Parser.pm (source)

   1  # XML::Parser
   2  #
   3  # Copyright (c) 1998-2000 Larry Wall and Clark Cooper
   4  # All rights reserved.
   5  #
   6  # This program is free software; you can redistribute it and/or
   7  # modify it under the same terms as Perl itself.
   8  
   9  package XML::Parser;
  10  
  11  use Carp;
  12  
  13  BEGIN {
  14    require XML::Parser::Expat;
  15    $VERSION = '2.36';
  16    die "Parser.pm and Expat.pm versions don't match"
  17      unless $VERSION eq $XML::Parser::Expat::VERSION;
  18  }
  19  
  20  use strict;
  21  
  22  use vars qw($VERSION $LWP_load_failed);
  23  
  24  $LWP_load_failed = 0;
  25  
  26  sub new {
  27    my ($class, %args) = @_;
  28    my $style = $args{Style};
  29    
  30    my $nonexopt = $args{Non_Expat_Options} ||= {};
  31    
  32    $nonexopt->{Style}             = 1;
  33    $nonexopt->{Non_Expat_Options} = 1;
  34    $nonexopt->{Handlers}          = 1;
  35    $nonexopt->{_HNDL_TYPES}       = 1;
  36    $nonexopt->{NoLWP}             = 1;
  37    
  38    $args{_HNDL_TYPES} = {%XML::Parser::Expat::Handler_Setters};
  39    $args{_HNDL_TYPES}->{Init} = 1;
  40    $args{_HNDL_TYPES}->{Final} = 1;
  41    
  42    $args{Handlers} ||= {};
  43    my $handlers = $args{Handlers};
  44    
  45    if (defined($style)) {
  46      my $stylepkg = $style;
  47      
  48      if ($stylepkg !~ /::/) {
  49        $stylepkg = "\u$style";
  50        
  51        eval {
  52            my $fullpkg = 'XML::Parser::Style::' . $stylepkg;
  53            my $stylefile = $fullpkg;
  54            $stylefile =~ s/::/\//g;
  55            require "$stylefile.pm";
  56            $stylepkg = $fullpkg;
  57        };
  58        if ($@) {
  59            # fallback to old behaviour
  60            $stylepkg = 'XML::Parser::' . $stylepkg;
  61        }
  62      }
  63      
  64      my $htype;
  65      foreach $htype (keys %{$args{_HNDL_TYPES}}) {
  66        # Handlers explicity given override
  67        # handlers from the Style package
  68        unless (defined($handlers->{$htype})) {
  69          
  70          # A handler in the style package must either have
  71          # exactly the right case as the type name or a
  72          # completely lower case version of it.
  73          
  74          my $hname = "$stylepkg}::$htype";
  75          if (defined(&$hname)) {
  76            $handlers->{$htype} = \&$hname;
  77            next;
  78          }
  79          
  80          $hname = "$stylepkg}::\L$htype";
  81          if (defined(&$hname)) {
  82            $handlers->{$htype} = \&$hname;
  83            next;
  84          }
  85        }
  86      }
  87    }
  88    
  89    unless (defined($handlers->{ExternEnt})
  90            or defined ($handlers->{ExternEntFin})) {
  91      
  92      if ($args{NoLWP} or $LWP_load_failed) {
  93        $handlers->{ExternEnt} = \&file_ext_ent_handler;
  94        $handlers->{ExternEntFin} = \&file_ext_ent_cleanup;
  95      }
  96      else {
  97        # The following just bootstraps the real LWP external entity
  98        # handler
  99  
 100        $handlers->{ExternEnt} = \&initial_ext_ent_handler;
 101  
 102        # No cleanup function available until LWPExternEnt.pl loaded
 103      }
 104    }
 105  
 106    $args{Pkg} ||= caller;
 107    bless \%args, $class;
 108  }                                # End of new
 109  
 110  sub setHandlers {
 111    my ($self, @handler_pairs) = @_;
 112    
 113    croak("Uneven number of arguments to setHandlers method")
 114      if (int(@handler_pairs) & 1);
 115    
 116    my @ret;
 117    while (@handler_pairs) {
 118      my $type = shift @handler_pairs;
 119      my $handler = shift @handler_pairs;
 120      unless (defined($self->{_HNDL_TYPES}->{$type})) {
 121        my @types = sort keys %{$self->{_HNDL_TYPES}};
 122        
 123        croak("Unknown Parser handler type: $type\n Valid types: @types");
 124      }
 125      push(@ret, $type, $self->{Handlers}->{$type});
 126      $self->{Handlers}->{$type} = $handler;
 127    }
 128  
 129    return @ret;
 130  }
 131  
 132  sub parse_start {
 133    my $self = shift;
 134    my @expat_options = ();
 135  
 136    my ($key, $val);
 137    while (($key, $val) = each %{$self}) {
 138      push (@expat_options, $key, $val)
 139        unless exists $self->{Non_Expat_Options}->{$key};
 140    }
 141  
 142    my %handlers = %{$self->{Handlers}};
 143    my $init = delete $handlers{Init};
 144    my $final = delete $handlers{Final};
 145  
 146    my $expatnb = new XML::Parser::ExpatNB(@expat_options, @_);
 147    $expatnb->setHandlers(%handlers);
 148  
 149    &$init($expatnb)
 150      if defined($init);
 151  
 152    $expatnb->{_State_} = 1;
 153  
 154    $expatnb->{FinalHandler} = $final
 155      if defined($final);
 156  
 157    return $expatnb;
 158  }
 159  
 160  sub parse {
 161    my $self = shift;
 162    my $arg  = shift;
 163    my @expat_options = ();
 164    my ($key, $val);
 165    while (($key, $val) = each %{$self}) {
 166      push(@expat_options, $key, $val)
 167        unless exists $self->{Non_Expat_Options}->{$key};
 168    }
 169    
 170    my $expat = new XML::Parser::Expat(@expat_options, @_);
 171    my %handlers = %{$self->{Handlers}};
 172    my $init = delete $handlers{Init};
 173    my $final = delete $handlers{Final};
 174    
 175    $expat->setHandlers(%handlers);
 176    
 177    if ($self->{Base}) {
 178      $expat->base($self->{Base});
 179    }
 180  
 181    &$init($expat)
 182      if defined($init);
 183    
 184    my @result = ();
 185    my $result;
 186    eval {
 187      $result = $expat->parse($arg);
 188    };
 189    my $err = $@;
 190    if ($err) {
 191      $expat->release;
 192      die $err;
 193    }
 194    
 195    if ($result and defined($final)) {
 196      if (wantarray) {
 197        @result = &$final($expat);
 198      }
 199      else {
 200        $result = &$final($expat);
 201      }
 202    }
 203    
 204    $expat->release;
 205  
 206    return unless defined wantarray;
 207    return wantarray ? @result : $result;
 208  }
 209  
 210  sub parsestring {
 211    my $self = shift;
 212    $self->parse(@_);
 213  }
 214  
 215  sub parsefile {
 216    my $self = shift;
 217    my $file = shift;
 218    local(*FILE);
 219    open(FILE, $file) or  croak "Couldn't open $file:\n$!";
 220    binmode(FILE);
 221    my @ret;
 222    my $ret;
 223  
 224    $self->{Base} = $file;
 225  
 226    if (wantarray) {
 227      eval {
 228        @ret = $self->parse(*FILE, @_);
 229      };
 230    }
 231    else {
 232      eval {
 233        $ret = $self->parse(*FILE, @_);
 234      };
 235    }
 236    my $err = $@;
 237    close(FILE);
 238    die $err if $err;
 239    
 240    return unless defined wantarray;
 241    return wantarray ? @ret : $ret;
 242  }
 243  
 244  sub initial_ext_ent_handler {
 245    # This just bootstraps in the real lwp_ext_ent_handler which
 246    # also loads the URI and LWP modules.
 247  
 248    unless ($LWP_load_failed) {
 249      local($^W) = 0;
 250  
 251      my $stat =
 252        eval {
 253          require('XML/Parser/LWPExternEnt.pl');
 254        };
 255        
 256      if ($stat) {
 257        $_[0]->setHandlers(ExternEnt    => \&lwp_ext_ent_handler,
 258                           ExternEntFin => \&lwp_ext_ent_cleanup);
 259                         
 260        goto &lwp_ext_ent_handler;
 261      }
 262  
 263      # Failed to load lwp handler, act as if NoLWP
 264  
 265      $LWP_load_failed = 1;
 266  
 267      my $cmsg = "Couldn't load LWP based external entity handler\n";
 268      $cmsg .= "Switching to file-based external entity handler\n";
 269      $cmsg .= " (To avoid this message, use NoLWP option to XML::Parser)\n";
 270      warn($cmsg);
 271    }
 272  
 273    $_[0]->setHandlers(ExternEnt    => \&file_ext_ent_handler,
 274                       ExternEntFin => \&file_ext_ent_cleanup);
 275    goto &file_ext_ent_handler;
 276  
 277  }
 278  
 279  sub file_ext_ent_handler {
 280    my ($xp, $base, $path) = @_;
 281  
 282    # Prepend base only for relative paths
 283  
 284    if (defined($base)
 285        and not ($path =~ m!^(?:[\\/]|\w+:)!))
 286      {
 287        my $newpath = $base;
 288        $newpath =~ s![^\\/:]*$!$path!;
 289        $path = $newpath;
 290      }
 291  
 292    if ($path =~ /^\s*[|>+]/
 293        or $path =~ /\|\s*$/) {
 294      $xp->{ErrorMessage}
 295          .= "System ID ($path) contains Perl IO control characters";
 296      return undef;
 297    }
 298  
 299    require IO::File;
 300    my $fh = new IO::File($path);
 301    unless (defined $fh) {
 302      $xp->{ErrorMessage}
 303        .= "Failed to open $path:\n$!";
 304      return undef;
 305    }
 306  
 307    $xp->{_BaseStack} ||= [];
 308    $xp->{_FhStack} ||= [];
 309  
 310    push(@{$xp->{_BaseStack}}, $base);
 311    push(@{$xp->{_FhStack}}, $fh);
 312  
 313    $xp->base($path);
 314    
 315    return $fh;
 316  }
 317  
 318  sub file_ext_ent_cleanup {
 319    my ($xp) = @_;
 320  
 321    my $fh = pop(@{$xp->{_FhStack}});
 322    $fh->close;
 323  
 324    my $base = pop(@{$xp->{_BaseStack}});
 325    $xp->base($base);
 326  }
 327  
 328  1;
 329  
 330  __END__
 331  
 332  =head1 NAME
 333  
 334  XML::Parser - A perl module for parsing XML documents
 335  
 336  =head1 SYNOPSIS
 337  
 338    use XML::Parser;
 339    
 340    $p1 = new XML::Parser(Style => 'Debug');
 341    $p1->parsefile('REC-xml-19980210.xml');
 342    $p1->parse('<foo id="me">Hello World</foo>');
 343  
 344    # Alternative
 345    $p2 = new XML::Parser(Handlers => {Start => \&handle_start,
 346                                       End   => \&handle_end,
 347                                       Char  => \&handle_char});
 348    $p2->parse($socket);
 349  
 350    # Another alternative
 351    $p3 = new XML::Parser(ErrorContext => 2);
 352  
 353    $p3->setHandlers(Char    => \&text,
 354                     Default => \&other);
 355  
 356    open(FOO, 'xmlgenerator |');
 357    $p3->parse(*FOO, ProtocolEncoding => 'ISO-8859-1');
 358    close(FOO);
 359  
 360    $p3->parsefile('junk.xml', ErrorContext => 3);
 361  
 362  =begin man
 363  .ds PI PI
 364  
 365  =end man
 366  
 367  =head1 DESCRIPTION
 368  
 369  This module provides ways to parse XML documents. It is built on top of
 370  L<XML::Parser::Expat>, which is a lower level interface to James Clark's
 371  expat library. Each call to one of the parsing methods creates a new
 372  instance of XML::Parser::Expat which is then used to parse the document.
 373  Expat options may be provided when the XML::Parser object is created.
 374  These options are then passed on to the Expat object on each parse call.
 375  They can also be given as extra arguments to the parse methods, in which
 376  case they override options given at XML::Parser creation time.
 377  
 378  The behavior of the parser is controlled either by C<L</Style>> and/or
 379  C<L</Handlers>> options, or by L</setHandlers> method. These all provide
 380  mechanisms for XML::Parser to set the handlers needed by XML::Parser::Expat.
 381  If neither C<Style> nor C<Handlers> are specified, then parsing just
 382  checks the document for being well-formed.
 383  
 384  When underlying handlers get called, they receive as their first parameter
 385  the I<Expat> object, not the Parser object.
 386  
 387  =head1 METHODS
 388  
 389  =over 4
 390  
 391  =item new
 392  
 393  This is a class method, the constructor for XML::Parser. Options are passed
 394  as keyword value pairs. Recognized options are:
 395  
 396  =over 4
 397  
 398  =item * Style
 399  
 400  This option provides an easy way to create a given style of parser. The
 401  built in styles are: L<"Debug">, L<"Subs">, L<"Tree">, L<"Objects">,
 402  and L<"Stream">. These are all defined in separate packages under
 403  C<XML::Parser::Style::*>, and you can find further documentation for
 404  each style both below, and in those packages.
 405  
 406  Custom styles can be provided by giving a full package name containing
 407  at least one '::'. This package should then have subs defined for each
 408  handler it wishes to have installed. See L<"STYLES"> below
 409  for a discussion of each built in style.
 410  
 411  =item * Handlers
 412  
 413  When provided, this option should be an anonymous hash containing as
 414  keys the type of handler and as values a sub reference to handle that
 415  type of event. All the handlers get passed as their 1st parameter the
 416  instance of expat that is parsing the document. Further details on
 417  handlers can be found in L<"HANDLERS">. Any handler set here
 418  overrides the corresponding handler set with the Style option.
 419  
 420  =item * Pkg
 421  
 422  Some styles will refer to subs defined in this package. If not provided,
 423  it defaults to the package which called the constructor.
 424  
 425  =item * ErrorContext
 426  
 427  This is an Expat option. When this option is defined, errors are reported
 428  in context. The value should be the number of lines to show on either side
 429  of the line in which the error occurred.
 430  
 431  =item * ProtocolEncoding
 432  
 433  This is an Expat option. This sets the protocol encoding name. It defaults
 434  to none. The built-in encodings are: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and
 435  C<US-ASCII>. Other encodings may be used if they have encoding maps in one
 436  of the directories in the @Encoding_Path list. Check L<"ENCODINGS"> for
 437  more information on encoding maps. Setting the protocol encoding overrides
 438  any encoding in the XML declaration.
 439  
 440  =item * Namespaces
 441  
 442  This is an Expat option. If this is set to a true value, then namespace
 443  processing is done during the parse. See L<XML::Parser::Expat/"Namespaces">
 444  for further discussion of namespace processing.
 445  
 446  =item * NoExpand
 447  
 448  This is an Expat option. Normally, the parser will try to expand references
 449  to entities defined in the internal subset. If this option is set to a true
 450  value, and a default handler is also set, then the default handler will be
 451  called when an entity reference is seen in text. This has no effect if a
 452  default handler has not been registered, and it has no effect on the expansion
 453  of entity references inside attribute values.
 454  
 455  =item * Stream_Delimiter
 456  
 457  This is an Expat option. It takes a string value. When this string is found
 458  alone on a line while parsing from a stream, then the parse is ended as if it
 459  saw an end of file. The intended use is with a stream of xml documents in a
 460  MIME multipart format. The string should not contain a trailing newline.
 461  
 462  =item * ParseParamEnt
 463  
 464  This is an Expat option. Unless standalone is set to "yes" in the XML
 465  declaration, setting this to a true value allows the external DTD to be read,
 466  and parameter entities to be parsed and expanded.
 467  
 468  =item * NoLWP
 469  
 470  This option has no effect if the ExternEnt or ExternEntFin handlers are
 471  directly set. Otherwise, if true, it forces the use of a file based external
 472  entity handler.
 473  
 474  =item * Non-Expat-Options
 475  
 476  If provided, this should be an anonymous hash whose keys are options that
 477  shouldn't be passed to Expat. This should only be of concern to those
 478  subclassing XML::Parser.
 479  
 480  =back
 481  
 482  =item  setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]])
 483  
 484  This method registers handlers for various parser events. It overrides any
 485  previous handlers registered through the Style or Handler options or through
 486  earlier calls to setHandlers. By providing a false or undefined value as
 487  the handler, the existing handler can be unset.
 488  
 489  This method returns a list of type, handler pairs corresponding to the
 490  input. The handlers returned are the ones that were in effect prior to
 491  the call.
 492  
 493  See a description of the handler types in L<"HANDLERS">.
 494  
 495  =item parse(SOURCE [, OPT => OPT_VALUE [...]])
 496  
 497  The SOURCE parameter should either be a string containing the whole XML
 498  document, or it should be an open IO::Handle. Constructor options to
 499  XML::Parser::Expat given as keyword-value pairs may follow the SOURCE
 500  parameter. These override, for this call, any options or attributes passed
 501  through from the XML::Parser instance.
 502  
 503  A die call is thrown if a parse error occurs. Otherwise it will return 1
 504  or whatever is returned from the B<Final> handler, if one is installed.
 505  In other words, what parse may return depends on the style.
 506  
 507  =item parsestring
 508  
 509  This is just an alias for parse for backwards compatibility.
 510  
 511  =item parsefile(FILE [, OPT => OPT_VALUE [...]])
 512  
 513  Open FILE for reading, then call parse with the open handle. The file
 514  is closed no matter how parse returns. Returns what parse returns.
 515  
 516  =item parse_start([ OPT => OPT_VALUE [...]])
 517  
 518  Create and return a new instance of XML::Parser::ExpatNB. Constructor
 519  options may be provided. If an init handler has been provided, it is
 520  called before returning the ExpatNB object. Documents are parsed by
 521  making incremental calls to the parse_more method of this object, which
 522  takes a string. A single call to the parse_done method of this object,
 523  which takes no arguments, indicates that the document is finished.
 524  
 525  If there is a final handler installed, it is executed by the parse_done
 526  method before returning and the parse_done method returns whatever is
 527  returned by the final handler.
 528  
 529  =back
 530  
 531  =head1 HANDLERS
 532  
 533  Expat is an event based parser. As the parser recognizes parts of the
 534  document (say the start or end tag for an XML element), then any handlers
 535  registered for that type of an event are called with suitable parameters.
 536  All handlers receive an instance of XML::Parser::Expat as their first
 537  argument. See L<XML::Parser::Expat/"METHODS"> for a discussion of the
 538  methods that can be called on this object.
 539  
 540  =head2 Init                (Expat)
 541  
 542  This is called just before the parsing of the document starts.
 543  
 544  =head2 Final                (Expat)
 545  
 546  This is called just after parsing has finished, but only if no errors
 547  occurred during the parse. Parse returns what this returns.
 548  
 549  =head2 Start                (Expat, Element [, Attr, Val [,...]])
 550  
 551  This event is generated when an XML start tag is recognized. Element is the
 552  name of the XML element type that is opened with the start tag. The Attr &
 553  Val pairs are generated for each attribute in the start tag.
 554  
 555  =head2 End                (Expat, Element)
 556  
 557  This event is generated when an XML end tag is recognized. Note that
 558  an XML empty tag (<foo/>) generates both a start and an end event.
 559  
 560  =head2 Char                (Expat, String)
 561  
 562  This event is generated when non-markup is recognized. The non-markup
 563  sequence of characters is in String. A single non-markup sequence of
 564  characters may generate multiple calls to this handler. Whatever the
 565  encoding of the string in the original document, this is given to the
 566  handler in UTF-8.
 567  
 568  =head2 Proc                (Expat, Target, Data)
 569  
 570  This event is generated when a processing instruction is recognized.
 571  
 572  =head2 Comment                (Expat, Data)
 573  
 574  This event is generated when a comment is recognized.
 575  
 576  =head2 CdataStart        (Expat)
 577  
 578  This is called at the start of a CDATA section.
 579  
 580  =head2 CdataEnd                (Expat)
 581  
 582  This is called at the end of a CDATA section.
 583  
 584  =head2 Default                (Expat, String)
 585  
 586  This is called for any characters that don't have a registered handler.
 587  This includes both characters that are part of markup for which no
 588  events are generated (markup declarations) and characters that
 589  could generate events, but for which no handler has been registered.
 590  
 591  Whatever the encoding in the original document, the string is returned to
 592  the handler in UTF-8.
 593  
 594  =head2 Unparsed                (Expat, Entity, Base, Sysid, Pubid, Notation)
 595  
 596  This is called for a declaration of an unparsed entity. Entity is the name
 597  of the entity. Base is the base to be used for resolving a relative URI.
 598  Sysid is the system id. Pubid is the public id. Notation is the notation
 599  name. Base and Pubid may be undefined.
 600  
 601  =head2 Notation                (Expat, Notation, Base, Sysid, Pubid)
 602  
 603  This is called for a declaration of notation. Notation is the notation name.
 604  Base is the base to be used for resolving a relative URI. Sysid is the system
 605  id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined.
 606  
 607  =head2 ExternEnt        (Expat, Base, Sysid, Pubid)
 608  
 609  This is called when an external entity is referenced. Base is the base to be
 610  used for resolving a relative URI. Sysid is the system id. Pubid is the public
 611  id. Base, and Pubid may be undefined.
 612  
 613  This handler should either return a string, which represents the contents of
 614  the external entity, or return an open filehandle that can be read to obtain
 615  the contents of the external entity, or return undef, which indicates the
 616  external entity couldn't be found and will generate a parse error.
 617  
 618  If an open filehandle is returned, it must be returned as either a glob
 619  (*FOO) or as a reference to a glob (e.g. an instance of IO::Handle).
 620  
 621  A default handler is installed for this event. The default handler is
 622  XML::Parser::lwp_ext_ent_handler unless the NoLWP option was provided with
 623  a true value, otherwise XML::Parser::file_ext_ent_handler is the default
 624  handler for external entities. Even without the NoLWP option, if the
 625  URI or LWP modules are missing, the file based handler ends up being used
 626  after giving a warning on the first external entity reference.
 627  
 628  The LWP external entity handler will use proxies defined in the environment
 629  (http_proxy, ftp_proxy, etc.).
 630  
 631  Please note that the LWP external entity handler reads the entire
 632  entity into a string and returns it, where as the file handler opens a
 633  filehandle.
 634  
 635  Also note that the file external entity handler will likely choke on
 636  absolute URIs or file names that don't fit the conventions of the local
 637  operating system.
 638  
 639  The expat base method can be used to set a basename for
 640  relative pathnames. If no basename is given, or if the basename is itself
 641  a relative name, then it is relative to the current working directory.
 642  
 643  =head2 ExternEntFin        (Expat)
 644  
 645  This is called after parsing an external entity. It's not called unless
 646  an ExternEnt handler is also set. There is a default handler installed
 647  that pairs with the default ExternEnt handler.
 648  
 649  If you're going to install your own ExternEnt handler, then you should
 650  set (or unset) this handler too.
 651  
 652  =head2 Entity                (Expat, Name, Val, Sysid, Pubid, Ndata, IsParam)
 653  
 654  This is called when an entity is declared. For internal entities, the Val
 655  parameter will contain the value and the remaining three parameters will be
 656  undefined. For external entities, the Val parameter will be undefined, the
 657  Sysid parameter will have the system id, the Pubid parameter will have the
 658  public id if it was provided (it will be undefined otherwise), the Ndata
 659  parameter will contain the notation for unparsed entities. If this is a
 660  parameter entity declaration, then the IsParam parameter is true.
 661  
 662  Note that this handler and the Unparsed handler above overlap. If both are
 663  set, then this handler will not be called for unparsed entities.
 664  
 665  =head2 Element                (Expat, Name, Model)
 666  
 667  The element handler is called when an element declaration is found. Name
 668  is the element name, and Model is the content model as an XML::Parser::Content
 669  object. See L<XML::Parser::Expat/"XML::Parser::ContentModel Methods">
 670  for methods available for this class.
 671  
 672  =head2 Attlist                (Expat, Elname, Attname, Type, Default, Fixed)
 673  
 674  This handler is called for each attribute in an ATTLIST declaration.
 675  So an ATTLIST declaration that has multiple attributes will generate multiple
 676  calls to this handler. The Elname parameter is the name of the element with
 677  which the attribute is being associated. The Attname parameter is the name
 678  of the attribute. Type is the attribute type, given as a string. Default is
 679  the default value, which will either be "#REQUIRED", "#IMPLIED" or a quoted
 680  string (i.e. the returned string will begin and end with a quote character).
 681  If Fixed is true, then this is a fixed attribute.
 682  
 683  =head2 Doctype                (Expat, Name, Sysid, Pubid, Internal)
 684  
 685  This handler is called for DOCTYPE declarations. Name is the document type
 686  name. Sysid is the system id of the document type, if it was provided,
 687  otherwise it's undefined. Pubid is the public id of the document type,
 688  which will be undefined if no public id was given. Internal is the internal
 689  subset, given as a string. If there was no internal subset, it will be
 690  undefined. Internal will contain all whitespace, comments, processing
 691  instructions, and declarations seen in the internal subset. The declarations
 692  will be there whether or not they have been processed by another handler
 693  (except for unparsed entities processed by the Unparsed handler). However,
 694  comments and processing instructions will not appear if they've been processed
 695  by their respective handlers.
 696  
 697  =head2 * DoctypeFin                (Parser)
 698  
 699  This handler is called after parsing of the DOCTYPE declaration has finished,
 700  including any internal or external DTD declarations.
 701  
 702  =head2 XMLDecl                (Expat, Version, Encoding, Standalone)
 703  
 704  This handler is called for xml declarations. Version is a string containg
 705  the version. Encoding is either undefined or contains an encoding string.
 706  Standalone will be either true, false, or undefined if the standalone attribute
 707  is yes, no, or not made respectively.
 708  
 709  =head1 STYLES
 710  
 711  =head2 Debug
 712  
 713  This just prints out the document in outline form. Nothing special is
 714  returned by parse.
 715  
 716  =head2 Subs
 717  
 718  Each time an element starts, a sub by that name in the package specified
 719  by the Pkg option is called with the same parameters that the Start
 720  handler gets called with.
 721  
 722  Each time an element ends, a sub with that name appended with an underscore
 723  ("_"), is called with the same parameters that the End handler gets called
 724  with.
 725  
 726  Nothing special is returned by parse.
 727  
 728  =head2 Tree
 729  
 730  Parse will return a parse tree for the document. Each node in the tree
 731  takes the form of a tag, content pair. Text nodes are represented with
 732  a pseudo-tag of "0" and the string that is their content. For elements,
 733  the content is an array reference. The first item in the array is a
 734  (possibly empty) hash reference containing attributes. The remainder of
 735  the array is a sequence of tag-content pairs representing the content
 736  of the element.
 737  
 738  So for example the result of parsing:
 739  
 740    <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
 741  
 742  would be:
 743  
 744               Tag   Content
 745    ==================================================================
 746    [foo, [{}, head, [{id => "a"}, 0, "Hello ",  em, [{}, 0, "there"]],
 747                bar, [         {}, 0, "Howdy",  ref, [{}]],
 748                  0, "do"
 749          ]
 750    ]
 751  
 752  The root document "foo", has 3 children: a "head" element, a "bar"
 753  element and the text "do". After the empty attribute hash, these are
 754  represented in it's contents by 3 tag-content pairs.
 755  
 756  =head2 Objects
 757  
 758  This is similar to the Tree style, except that a hash object is created for
 759  each element. The corresponding object will be in the class whose name
 760  is created by appending "::" and the element name to the package set with
 761  the Pkg option. Non-markup text will be in the ::Characters class. The
 762  contents of the corresponding object will be in an anonymous array that
 763  is the value of the Kids property for that object.
 764  
 765  =head2 Stream
 766  
 767  This style also uses the Pkg package. If none of the subs that this
 768  style looks for is there, then the effect of parsing with this style is
 769  to print a canonical copy of the document without comments or declarations.
 770  All the subs receive as their 1st parameter the Expat instance for the
 771  document they're parsing.
 772  
 773  It looks for the following routines:
 774  
 775  =over 4
 776  
 777  =item * StartDocument
 778  
 779  Called at the start of the parse .
 780  
 781  =item * StartTag
 782  
 783  Called for every start tag with a second parameter of the element type. The $_
 784  variable will contain a copy of the tag and the %_ variable will contain
 785  attribute values supplied for that element.
 786  
 787  =item * EndTag
 788  
 789  Called for every end tag with a second parameter of the element type. The $_
 790  variable will contain a copy of the end tag.
 791  
 792  =item * Text
 793  
 794  Called just before start or end tags with accumulated non-markup text in
 795  the $_ variable.
 796  
 797  =item * PI
 798  
 799  Called for processing instructions. The $_ variable will contain a copy of
 800  the PI and the target and data are sent as 2nd and 3rd parameters
 801  respectively.
 802  
 803  =item * EndDocument
 804  
 805  Called at conclusion of the parse.
 806  
 807  =back
 808  
 809  =head1 ENCODINGS
 810  
 811  XML documents may be encoded in character sets other than Unicode as
 812  long as they may be mapped into the Unicode character set. Expat has
 813  further restrictions on encodings. Read the xmlparse.h header file in
 814  the expat distribution to see details on these restrictions.
 815  
 816  Expat has built-in encodings for: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and
 817  C<US-ASCII>. Encodings are set either through the XML declaration
 818  encoding attribute or through the ProtocolEncoding option to XML::Parser
 819  or XML::Parser::Expat.
 820  
 821  For encodings other than the built-ins, expat calls the function
 822  load_encoding in the Expat package with the encoding name. This function
 823  looks for a file in the path list @XML::Parser::Expat::Encoding_Path, that
 824  matches the lower-cased name with a '.enc' extension. The first one it
 825  finds, it loads.
 826  
 827  If you wish to build your own encoding maps, check out the XML::Encoding
 828  module from CPAN.
 829  
 830  =head1 AUTHORS
 831  
 832  Larry Wall <F<larry@wall.org>> wrote version 1.0.
 833  
 834  Clark Cooper <F<coopercc@netheaven.com>> picked up support, changed the API
 835  for this version (2.x), provided documentation,
 836  and added some standard package features.
 837  
 838  Matt Sergeant <F<matt@sergeant.org>> is now maintaining XML::Parser
 839  
 840  =cut


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1