[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3-unattended/var/se3/unattended/install/linuxaux/opt/perl/lib/5.10.0/File/ -> Fetch.pm (source)

   1  package File::Fetch;
   2  
   3  use strict;
   4  use FileHandle;
   5  use File::Copy;
   6  use File::Spec;
   7  use File::Spec::Unix;
   8  use File::Basename              qw[dirname];
   9  
  10  use Cwd                         qw[cwd];
  11  use Carp                        qw[carp];
  12  use IPC::Cmd                    qw[can_run run];
  13  use File::Path                  qw[mkpath];
  14  use Params::Check               qw[check];
  15  use Module::Load::Conditional   qw[can_load];
  16  use Locale::Maketext::Simple    Style => 'gettext';
  17  
  18  use vars    qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
  19                  $BLACKLIST $METHOD_FAIL $VERSION $METHODS
  20                  $FTP_PASSIVE $TIMEOUT $DEBUG $WARN
  21              ];
  22  
  23  use constant QUOTE  => do { $^O eq 'MSWin32' ? q["] : q['] };            
  24              
  25  
  26  $VERSION        = '0.14';
  27  $VERSION        = eval $VERSION;    # avoid warnings with development releases
  28  $PREFER_BIN     = 0;                # XXX TODO implement
  29  $FROM_EMAIL     = 'File-Fetch@example.com';
  30  $USER_AGENT     = 'File::Fetch/$VERSION';
  31  $BLACKLIST      = [qw|ftp|];
  32  $METHOD_FAIL    = { };
  33  $FTP_PASSIVE    = 1;
  34  $TIMEOUT        = 0;
  35  $DEBUG          = 0;
  36  $WARN           = 1;
  37  
  38  ### methods available to fetch the file depending on the scheme
  39  $METHODS = {
  40      http    => [ qw|lwp wget curl lynx| ],
  41      ftp     => [ qw|lwp netftp wget curl ncftp ftp| ],
  42      file    => [ qw|lwp file| ],
  43      rsync   => [ qw|rsync| ]
  44  };
  45  
  46  ### silly warnings ###
  47  local $Params::Check::VERBOSE               = 1;
  48  local $Params::Check::VERBOSE               = 1;
  49  local $Module::Load::Conditional::VERBOSE   = 0;
  50  local $Module::Load::Conditional::VERBOSE   = 0;
  51  
  52  ### see what OS we are on, important for file:// uris ###
  53  use constant ON_WIN         => ($^O eq 'MSWin32');
  54  use constant ON_VMS         => ($^O eq 'VMS');                                
  55  use constant ON_UNIX        => (!ON_WIN);
  56  use constant HAS_VOL        => (ON_WIN);
  57  use constant HAS_SHARE      => (ON_WIN);
  58  =pod
  59  
  60  =head1 NAME
  61  
  62  File::Fetch - A generic file fetching mechanism
  63  
  64  =head1 SYNOPSIS
  65  
  66      use File::Fetch;
  67  
  68      ### build a File::Fetch object ###
  69      my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
  70  
  71      ### fetch the uri to cwd() ###
  72      my $where = $ff->fetch() or die $ff->error;
  73  
  74      ### fetch the uri to /tmp ###
  75      my $where = $ff->fetch( to => '/tmp' );
  76  
  77      ### parsed bits from the uri ###
  78      $ff->uri;
  79      $ff->scheme;
  80      $ff->host;
  81      $ff->path;
  82      $ff->file;
  83  
  84  =head1 DESCRIPTION
  85  
  86  File::Fetch is a generic file fetching mechanism.
  87  
  88  It allows you to fetch any file pointed to by a C<ftp>, C<http>,
  89  C<file>, or C<rsync> uri by a number of different means.
  90  
  91  See the C<HOW IT WORKS> section further down for details.
  92  
  93  =head1 ACCESSORS
  94  
  95  A C<File::Fetch> object has the following accessors
  96  
  97  =over 4
  98  
  99  =item $ff->uri
 100  
 101  The uri you passed to the constructor
 102  
 103  =item $ff->scheme
 104  
 105  The scheme from the uri (like 'file', 'http', etc)
 106  
 107  =item $ff->host
 108  
 109  The hostname in the uri.  Will be empty if host was originally 
 110  'localhost' for a 'file://' url.
 111  
 112  =item $ff->vol
 113  
 114  On operating systems with the concept of a volume the second element
 115  of a file:// is considered to the be volume specification for the file.
 116  Thus on Win32 this routine returns the volume, on other operating
 117  systems this returns nothing.
 118  
 119  On Windows this value may be empty if the uri is to a network share, in 
 120  which case the 'share' property will be defined. Additionally, volume 
 121  specifications that use '|' as ':' will be converted on read to use ':'.
 122  
 123  On VMS, which has a volume concept, this field will be empty because VMS
 124  file specifications are converted to absolute UNIX format and the volume
 125  information is transparently included.
 126  
 127  =item $ff->share
 128  
 129  On systems with the concept of a network share (currently only Windows) returns 
 130  the sharename from a file://// url.  On other operating systems returns empty.
 131  
 132  =item $ff->path
 133  
 134  The path from the uri, will be at least a single '/'.
 135  
 136  =item $ff->file
 137  
 138  The name of the remote file. For the local file name, the
 139  result of $ff->output_file will be used. 
 140  
 141  =cut
 142  
 143  
 144  ##########################
 145  ### Object & Accessors ###
 146  ##########################
 147  
 148  {
 149      ### template for new() and autogenerated accessors ###
 150      my $Tmpl = {
 151          scheme          => { default => 'http' },
 152          host            => { default => 'localhost' },
 153          path            => { default => '/' },
 154          file            => { required => 1 },
 155          uri             => { required => 1 },
 156          vol             => { default => '' }, # windows for file:// uris
 157          share           => { default => '' }, # windows for file:// uris
 158          _error_msg      => { no_override => 1 },
 159          _error_msg_long => { no_override => 1 },
 160      };
 161      
 162      for my $method ( keys %$Tmpl ) {
 163          no strict 'refs';
 164          *$method = sub {
 165                          my $self = shift;
 166                          $self->{$method} = $_[0] if @_;
 167                          return $self->{$method};
 168                      }
 169      }
 170      
 171      sub _create {
 172          my $class = shift;
 173          my %hash  = @_;
 174          
 175          my $args = check( $Tmpl, \%hash ) or return;
 176          
 177          bless $args, $class;
 178      
 179          if( lc($args->scheme) ne 'file' and not $args->host ) {
 180              return File::Fetch->_error(loc(
 181                  "Hostname required when fetching from '%1'",$args->scheme));
 182          }
 183          
 184          for (qw[path file]) {
 185              unless( $args->$_() ) { # 5.5.x needs the ()
 186                  return File::Fetch->_error(loc("No '%1' specified",$_));
 187              }
 188          }
 189          
 190          return $args;
 191      }    
 192  }
 193  
 194  =item $ff->output_file
 195  
 196  The name of the output file. This is the same as $ff->file,
 197  but any query parameters are stripped off. For example:
 198  
 199      http://example.com/index.html?x=y
 200  
 201  would make the output file be C<index.html> rather than 
 202  C<index.html?x=y>.
 203  
 204  =back
 205  
 206  =cut
 207  
 208  sub output_file {
 209      my $self = shift;
 210      my $file = $self->file;
 211      
 212      $file =~ s/\?.*$//g;
 213      
 214      return $file;
 215  }
 216  
 217  ### XXX do this or just point to URI::Escape?
 218  # =head2 $esc_uri = $ff->escaped_uri
 219  # 
 220  # =cut
 221  # 
 222  # ### most of this is stolen straight from URI::escape
 223  # {   ### Build a char->hex map
 224  #     my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
 225  # 
 226  #     sub escaped_uri {
 227  #         my $self = shift;
 228  #         my $uri  = $self->uri;
 229  # 
 230  #         ### Default unsafe characters.  RFC 2732 ^(uric - reserved)
 231  #         $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
 232  #                     $escapes{$1} || $self->_fail_hi($1)/ge;
 233  # 
 234  #         return $uri;
 235  #     }
 236  # 
 237  #     sub _fail_hi {
 238  #         my $self = shift;
 239  #         my $char = shift;
 240  #         
 241  #         $self->_error(loc(
 242  #             "Can't escape '%1', try using the '%2' module instead", 
 243  #             sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
 244  #         ));            
 245  #     }
 246  # 
 247  #     sub output_file {
 248  #     
 249  #     }
 250  #     
 251  #     
 252  # }
 253  
 254  =head1 METHODS
 255  
 256  =head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
 257  
 258  Parses the uri and creates a corresponding File::Fetch::Item object,
 259  that is ready to be C<fetch>ed and returns it.
 260  
 261  Returns false on failure.
 262  
 263  =cut
 264  
 265  sub new {
 266      my $class = shift;
 267      my %hash  = @_;
 268  
 269      my ($uri);
 270      my $tmpl = {
 271          uri => { required => 1, store => \$uri },
 272      };
 273  
 274      check( $tmpl, \%hash ) or return;
 275  
 276      ### parse the uri to usable parts ###
 277      my $href    = __PACKAGE__->_parse_uri( $uri ) or return;
 278  
 279      ### make it into a FFI object ###
 280      my $ff      = File::Fetch->_create( %$href ) or return;
 281  
 282  
 283      ### return the object ###
 284      return $ff;
 285  }
 286  
 287  ### parses an uri to a hash structure:
 288  ###
 289  ### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
 290  ###
 291  ### becomes:
 292  ###
 293  ### $href = {
 294  ###     scheme  => 'ftp',
 295  ###     host    => 'ftp.cpan.org',
 296  ###     path    => '/pub/mirror',
 297  ###     file    => 'index.html'
 298  ### };
 299  ###
 300  ### In the case of file:// urls there maybe be additional fields
 301  ###
 302  ### For systems with volume specifications such as Win32 there will be 
 303  ### a volume specifier provided in the 'vol' field.
 304  ###
 305  ###   'vol' => 'volumename'
 306  ###
 307  ### For windows file shares there may be a 'share' key specified
 308  ###
 309  ###   'share' => 'sharename' 
 310  ###
 311  ### Note that the rules of what a file:// url means vary by the operating system 
 312  ### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
 313  ### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and 
 314  ### not '/foo/bar.txt'
 315  ###
 316  ### Similarly if the host interpreting the url is VMS then 
 317  ### file:///disk$user/my/notes/note12345.txt' means 
 318  ### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
 319  ### if it is unix where it means /disk$user/my/notes/note12345.txt'.
 320  ### Except for some cases in the File::Spec methods, Perl on VMS will generally
 321  ### handle UNIX format file specifications.
 322  ###
 323  ### This means it is impossible to serve certain file:// urls on certain systems.
 324  ###
 325  ### Thus are the problems with a protocol-less specification. :-(
 326  ###
 327  
 328  sub _parse_uri {
 329      my $self = shift;
 330      my $uri  = shift or return;
 331  
 332      my $href = { uri => $uri };
 333  
 334      ### find the scheme ###
 335      $uri            =~ s|^(\w+)://||;
 336      $href->{scheme} = $1;
 337  
 338      ### See rfc 1738 section 3.10
 339      ### http://www.faqs.org/rfcs/rfc1738.html
 340      ### And wikipedia for more on windows file:// urls
 341      ### http://en.wikipedia.org/wiki/File://
 342      if( $href->{scheme} eq 'file' ) {
 343          
 344          my @parts = split '/',$uri;
 345  
 346          ### file://hostname/...
 347          ### file://hostname/...
 348          ### normalize file://localhost with file:///
 349          $href->{host} = $parts[0] || '';
 350  
 351          ### index in @parts where the path components begin;
 352          my $index = 1;  
 353  
 354          ### file:////hostname/sharename/blah.txt        
 355          if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
 356              
 357              $href->{host}   = $parts[2] || '';  # avoid warnings
 358              $href->{share}  = $parts[3] || '';  # avoid warnings        
 359  
 360              $index          = 4         # index after the share
 361  
 362          ### file:///D|/blah.txt
 363          ### file:///D:/blah.txt
 364          } elsif (HAS_VOL) {
 365          
 366              ### this code comes from dmq's patch, but:
 367              ### XXX if volume is empty, wouldn't that be an error? --kane
 368              ### if so, our file://localhost test needs to be fixed as wel            
 369              $href->{vol}    = $parts[1] || '';
 370  
 371              ### correct D| style colume descriptors
 372              $href->{vol}    =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
 373  
 374              $index          = 2;        # index after the volume
 375          } 
 376  
 377          ### rebuild the path from the leftover parts;
 378          $href->{path} = join '/', '', splice( @parts, $index, $#parts );
 379  
 380      } else {
 381          ### using anything but qw() in hash slices may produce warnings 
 382          ### in older perls :-(
 383          @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
 384      }
 385  
 386      ### split the path into file + dir ###
 387      {   my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
 388          $href->{path} = $parts[1];
 389          $href->{file} = $parts[2];
 390      }
 391  
 392      ### host will be empty if the target was 'localhost' and the 
 393      ### scheme was 'file'
 394      $href->{host} = '' if   ($href->{host}      eq 'localhost') and
 395                              ($href->{scheme}    eq 'file');
 396  
 397      return $href;
 398  }
 399  
 400  =head2 $ff->fetch( [to => /my/output/dir/] )
 401  
 402  Fetches the file you requested. By default it writes to C<cwd()>,
 403  but you can override that by specifying the C<to> argument.
 404  
 405  Returns the full path to the downloaded file on success, and false
 406  on failure.
 407  
 408  =cut
 409  
 410  sub fetch {
 411      my $self = shift or return;
 412      my %hash = @_;
 413  
 414      my $to;
 415      my $tmpl = {
 416          to  => { default => cwd(), store => \$to },
 417      };
 418  
 419      check( $tmpl, \%hash ) or return;
 420  
 421      ### On VMS force to VMS format so File::Spec will work.
 422      $to = VMS::Filespec::vmspath($to) if ON_VMS;
 423  
 424      ### create the path if it doesn't exist yet ###
 425      unless( -d $to ) {
 426          eval { mkpath( $to ) };
 427  
 428          return $self->_error(loc("Could not create path '%1'",$to)) if $@;
 429      }
 430  
 431      ### set passive ftp if required ###
 432      local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
 433  
 434      ### we dont use catfile on win32 because if we are using a cygwin tool
 435      ### under cmd.exe they wont understand windows style separators.
 436      my $out_to = ON_WIN ? $to.'/'.$self->output_file 
 437                          : File::Spec->catfile( $to, $self->output_file );
 438      
 439      for my $method ( @{ $METHODS->{$self->scheme} } ) {
 440          my $sub =  '_'.$method.'_fetch';
 441  
 442          unless( __PACKAGE__->can($sub) ) {
 443              $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
 444                          $method));
 445              next;
 446          }
 447  
 448          ### method is blacklisted ###
 449          next if grep { lc $_ eq $method } @$BLACKLIST;
 450  
 451          ### method is known to fail ###
 452          next if $METHOD_FAIL->{$method};
 453  
 454          ### there's serious issues with IPC::Run and quoting of command
 455          ### line arguments. using quotes in the wrong place breaks things,
 456          ### and in the case of say, 
 457          ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
 458          ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
 459          ### it doesn't matter how you quote, it always fails.
 460          local $IPC::Cmd::USE_IPC_RUN = 0;
 461          
 462          if( my $file = $self->$sub( 
 463                          to => $out_to
 464          )){
 465  
 466              unless( -e $file && -s _ ) {
 467                  $self->_error(loc("'%1' said it fetched '%2', ".
 468                       "but it was not created",$method,$file));
 469  
 470                  ### mark the failure ###
 471                  $METHOD_FAIL->{$method} = 1;
 472  
 473                  next;
 474  
 475              } else {
 476  
 477                  my $abs = File::Spec->rel2abs( $file );
 478                  return $abs;
 479              }
 480          }
 481      }
 482  
 483  
 484      ### if we got here, we looped over all methods, but we weren't able
 485      ### to fetch it.
 486      return;
 487  }
 488  
 489  ########################
 490  ### _*_fetch methods ###
 491  ########################
 492  
 493  ### LWP fetching ###
 494  sub _lwp_fetch {
 495      my $self = shift;
 496      my %hash = @_;
 497  
 498      my ($to);
 499      my $tmpl = {
 500          to  => { required => 1, store => \$to }
 501      };
 502      check( $tmpl, \%hash ) or return;
 503  
 504      ### modules required to download with lwp ###
 505      my $use_list = {
 506          LWP                 => '0.0',
 507          'LWP::UserAgent'    => '0.0',
 508          'HTTP::Request'     => '0.0',
 509          'HTTP::Status'      => '0.0',
 510          URI                 => '0.0',
 511  
 512      };
 513  
 514      if( can_load(modules => $use_list) ) {
 515  
 516          ### setup the uri object
 517          my $uri = URI->new( File::Spec::Unix->catfile(
 518                                      $self->path, $self->file
 519                          ) );
 520  
 521          ### special rules apply for file:// uris ###
 522          $uri->scheme( $self->scheme );
 523          $uri->host( $self->scheme eq 'file' ? '' : $self->host );
 524          $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
 525  
 526          ### set up the useragent object
 527          my $ua = LWP::UserAgent->new();
 528          $ua->timeout( $TIMEOUT ) if $TIMEOUT;
 529          $ua->agent( $USER_AGENT );
 530          $ua->from( $FROM_EMAIL );
 531          $ua->env_proxy;
 532  
 533          my $res = $ua->mirror($uri, $to) or return;
 534  
 535          ### uptodate or fetched ok ###
 536          if ( $res->code == 304 or $res->code == 200 ) {
 537              return $to;
 538  
 539          } else {
 540              return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
 541                          $res->code, HTTP::Status::status_message($res->code),
 542                          $res->status_line));
 543          }
 544  
 545      } else {
 546          $METHOD_FAIL->{'lwp'} = 1;
 547          return;
 548      }
 549  }
 550  
 551  ### Net::FTP fetching
 552  sub _netftp_fetch {
 553      my $self = shift;
 554      my %hash = @_;
 555  
 556      my ($to);
 557      my $tmpl = {
 558          to  => { required => 1, store => \$to }
 559      };
 560      check( $tmpl, \%hash ) or return;
 561  
 562      ### required modules ###
 563      my $use_list = { 'Net::FTP' => 0 };
 564  
 565      if( can_load( modules => $use_list ) ) {
 566  
 567          ### make connection ###
 568          my $ftp;
 569          my @options = ($self->host);
 570          push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
 571          unless( $ftp = Net::FTP->new( @options ) ) {
 572              return $self->_error(loc("Ftp creation failed: %1",$@));
 573          }
 574  
 575          ### login ###
 576          unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
 577              return $self->_error(loc("Could not login to '%1'",$self->host));
 578          }
 579  
 580          ### set binary mode, just in case ###
 581          $ftp->binary;
 582  
 583          ### create the remote path 
 584          ### remember remote paths are unix paths! [#11483]
 585          my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
 586  
 587          ### fetch the file ###
 588          my $target;
 589          unless( $target = $ftp->get( $remote, $to ) ) {
 590              return $self->_error(loc("Could not fetch '%1' from '%2'",
 591                          $remote, $self->host));
 592          }
 593  
 594          ### log out ###
 595          $ftp->quit;
 596  
 597          return $target;
 598  
 599      } else {
 600          $METHOD_FAIL->{'netftp'} = 1;
 601          return;
 602      }
 603  }
 604  
 605  ### /bin/wget fetch ###
 606  sub _wget_fetch {
 607      my $self = shift;
 608      my %hash = @_;
 609  
 610      my ($to);
 611      my $tmpl = {
 612          to  => { required => 1, store => \$to }
 613      };
 614      check( $tmpl, \%hash ) or return;
 615  
 616      ### see if we have a wget binary ###
 617      if( my $wget = can_run('wget') ) {
 618  
 619          ### no verboseness, thanks ###
 620          my $cmd = [ $wget, '--quiet' ];
 621  
 622          ### if a timeout is set, add it ###
 623          push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
 624  
 625          ### run passive if specified ###
 626          push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
 627  
 628          ### set the output document, add the uri ###
 629          push @$cmd, '--output-document', 
 630                      ### DO NOT quote things for IPC::Run, it breaks stuff.
 631                      $IPC::Cmd::USE_IPC_RUN
 632                          ? ($to, $self->uri)
 633                          : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
 634  
 635          ### shell out ###
 636          my $captured;
 637          unless(run( command => $cmd, 
 638                      buffer  => \$captured, 
 639                      verbose => $DEBUG  
 640          )) {
 641              ### wget creates the output document always, even if the fetch
 642              ### fails.. so unlink it in that case
 643              1 while unlink $to;
 644              
 645              return $self->_error(loc( "Command failed: %1", $captured || '' ));
 646          }
 647  
 648          return $to;
 649  
 650      } else {
 651          $METHOD_FAIL->{'wget'} = 1;
 652          return;
 653      }
 654  }
 655  
 656  
 657  ### /bin/ftp fetch ###
 658  sub _ftp_fetch {
 659      my $self = shift;
 660      my %hash = @_;
 661  
 662      my ($to);
 663      my $tmpl = {
 664          to  => { required => 1, store => \$to }
 665      };
 666      check( $tmpl, \%hash ) or return;
 667  
 668      ### see if we have a ftp binary ###
 669      if( my $ftp = can_run('ftp') ) {
 670  
 671          my $fh = FileHandle->new;
 672  
 673          local $SIG{CHLD} = 'IGNORE';
 674  
 675          unless ($fh->open("|$ftp -n")) {
 676              return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
 677          }
 678  
 679          my @dialog = (
 680              "lcd " . dirname($to),
 681              "open " . $self->host,
 682              "user anonymous $FROM_EMAIL",
 683              "cd /",
 684              "cd " . $self->path,
 685              "binary",
 686              "get " . $self->file . " " . $self->output_file,
 687              "quit",
 688          );
 689  
 690          foreach (@dialog) { $fh->print($_, "\n") }
 691          $fh->close or return;
 692  
 693          return $to;
 694      }
 695  }
 696  
 697  ### lynx is stupid - it decompresses any .gz file it finds to be text
 698  ### use /bin/lynx to fetch files
 699  sub _lynx_fetch {
 700      my $self = shift;
 701      my %hash = @_;
 702  
 703      my ($to);
 704      my $tmpl = {
 705          to  => { required => 1, store => \$to }
 706      };
 707      check( $tmpl, \%hash ) or return;
 708  
 709      ### see if we have a lynx binary ###
 710      if( my $lynx = can_run('lynx') ) {
 711  
 712          unless( IPC::Cmd->can_capture_buffer ) {
 713              $METHOD_FAIL->{'lynx'} = 1;
 714  
 715              return $self->_error(loc( 
 716                  "Can not capture buffers. Can not use '%1' to fetch files",
 717                  'lynx' ));
 718          }            
 719  
 720          ### write to the output file ourselves, since lynx ass_u_mes to much
 721          my $local = FileHandle->new(">$to")
 722                          or return $self->_error(loc(
 723                              "Could not open '%1' for writing: %2",$to,$!));
 724  
 725          ### dump to stdout ###
 726          my $cmd = [
 727              $lynx,
 728              '-source',
 729              "-auth=anonymous:$FROM_EMAIL",
 730          ];
 731  
 732          push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
 733  
 734          ### DO NOT quote things for IPC::Run, it breaks stuff.
 735          push @$cmd, $IPC::Cmd::USE_IPC_RUN
 736                          ? $self->uri
 737                          : QUOTE. $self->uri .QUOTE;
 738  
 739  
 740          ### shell out ###
 741          my $captured;
 742          unless(run( command => $cmd,
 743                      buffer  => \$captured,
 744                      verbose => $DEBUG )
 745          ) {
 746              return $self->_error(loc("Command failed: %1", $captured || ''));
 747          }
 748  
 749          ### print to local file ###
 750          ### XXX on a 404 with a special error page, $captured will actually
 751          ### hold the contents of that page, and make it *appear* like the
 752          ### request was a success, when really it wasn't :(
 753          ### there doesn't seem to be an option for lynx to change the exit
 754          ### code based on a 4XX status or so.
 755          ### the closest we can come is using --error_file and parsing that,
 756          ### which is very unreliable ;(
 757          $local->print( $captured );
 758          $local->close or return;
 759  
 760          return $to;
 761  
 762      } else {
 763          $METHOD_FAIL->{'lynx'} = 1;
 764          return;
 765      }
 766  }
 767  
 768  ### use /bin/ncftp to fetch files
 769  sub _ncftp_fetch {
 770      my $self = shift;
 771      my %hash = @_;
 772  
 773      my ($to);
 774      my $tmpl = {
 775          to  => { required => 1, store => \$to }
 776      };
 777      check( $tmpl, \%hash ) or return;
 778  
 779      ### we can only set passive mode in interactive sesssions, so bail out
 780      ### if $FTP_PASSIVE is set
 781      return if $FTP_PASSIVE;
 782  
 783      ### see if we have a ncftp binary ###
 784      if( my $ncftp = can_run('ncftp') ) {
 785  
 786          my $cmd = [
 787              $ncftp,
 788              '-V',                   # do not be verbose
 789              '-p', $FROM_EMAIL,      # email as password
 790              $self->host,            # hostname
 791              dirname($to),           # local dir for the file
 792                                      # remote path to the file
 793              ### DO NOT quote things for IPC::Run, it breaks stuff.
 794              $IPC::Cmd::USE_IPC_RUN
 795                          ? File::Spec::Unix->catdir( $self->path, $self->file )
 796                          : QUOTE. File::Spec::Unix->catdir( 
 797                                          $self->path, $self->file ) .QUOTE
 798              
 799          ];
 800  
 801          ### shell out ###
 802          my $captured;
 803          unless(run( command => $cmd,
 804                      buffer  => \$captured,
 805                      verbose => $DEBUG )
 806          ) {
 807              return $self->_error(loc("Command failed: %1", $captured || ''));
 808          }
 809  
 810          return $to;
 811  
 812      } else {
 813          $METHOD_FAIL->{'ncftp'} = 1;
 814          return;
 815      }
 816  }
 817  
 818  ### use /bin/curl to fetch files
 819  sub _curl_fetch {
 820      my $self = shift;
 821      my %hash = @_;
 822  
 823      my ($to);
 824      my $tmpl = {
 825          to  => { required => 1, store => \$to }
 826      };
 827      check( $tmpl, \%hash ) or return;
 828  
 829      if (my $curl = can_run('curl')) {
 830  
 831          ### these long opts are self explanatory - I like that -jmb
 832          my $cmd = [ $curl ];
 833  
 834          push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
 835  
 836          push(@$cmd, '--silent') unless $DEBUG;
 837  
 838          ### curl does the right thing with passive, regardless ###
 839          if ($self->scheme eq 'ftp') {
 840              push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
 841          }
 842  
 843          ### curl doesn't follow 302 (temporarily moved) etc automatically
 844          ### so we add --location to enable that.
 845          push @$cmd, '--fail', '--location', '--output', 
 846                      ### DO NOT quote things for IPC::Run, it breaks stuff.
 847                      $IPC::Cmd::USE_IPC_RUN
 848                          ? ($to, $self->uri)
 849                          : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
 850  
 851          my $captured;
 852          unless(run( command => $cmd,
 853                      buffer  => \$captured,
 854                      verbose => $DEBUG )
 855          ) {
 856  
 857              return $self->_error(loc("Command failed: %1", $captured || ''));
 858          }
 859  
 860          return $to;
 861  
 862      } else {
 863          $METHOD_FAIL->{'curl'} = 1;
 864          return;
 865      }
 866  }
 867  
 868  
 869  ### use File::Copy for fetching file:// urls ###
 870  ###
 871  ### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
 872  ### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
 873  ###
 874      
 875  sub _file_fetch {
 876      my $self = shift;
 877      my %hash = @_;
 878  
 879      my ($to);
 880      my $tmpl = {
 881          to  => { required => 1, store => \$to }
 882      };
 883      check( $tmpl, \%hash ) or return;
 884  
 885      
 886      
 887      ### prefix a / on unix systems with a file uri, since it would
 888      ### look somewhat like this:
 889      ###     file:///home/kane/file
 890      ### wheras windows file uris for 'c:\some\dir\file' might look like:
 891      ###     file:///C:/some/dir/file
 892      ###     file:///C|/some/dir/file
 893      ### or for a network share '\\host\share\some\dir\file':
 894      ###     file:////host/share/some/dir/file
 895      ###    
 896      ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
 897      ###     file://vms.host.edu/disk$user/my/notes/note12345.txt
 898      ###
 899      
 900      my $path    = $self->path;
 901      my $vol     = $self->vol;
 902      my $share   = $self->share;
 903  
 904      my $remote;
 905      if (!$share and $self->host) {
 906          return $self->_error(loc( 
 907              "Currently %1 cannot handle hosts in %2 urls",
 908              'File::Fetch', 'file://'
 909          ));            
 910      }
 911      
 912      if( $vol ) {
 913          $path   = File::Spec->catdir( split /\//, $path );
 914          $remote = File::Spec->catpath( $vol, $path, $self->file);
 915  
 916      } elsif( $share ) {
 917          ### win32 specific, and a share name, so we wont bother with File::Spec
 918          $path   =~ s|/+|\\|g;
 919          $remote = "\\\\".$self->host."\\$share\\$path";
 920  
 921      } else {
 922          ### File::Spec on VMS can not currently handle UNIX syntax.
 923          my $file_class = ON_VMS
 924              ? 'File::Spec::Unix'
 925              : 'File::Spec';
 926  
 927          $remote  = $file_class->catfile( $path, $self->file );
 928      }
 929  
 930      ### File::Copy is littered with 'die' statements :( ###
 931      my $rv = eval { File::Copy::copy( $remote, $to ) };
 932  
 933      ### something went wrong ###
 934      if( !$rv or $@ ) {
 935          return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
 936                               $remote, $to, $!, $@));
 937      }
 938  
 939      return $to;
 940  }
 941  
 942  ### use /usr/bin/rsync to fetch files
 943  sub _rsync_fetch {
 944      my $self = shift;
 945      my %hash = @_;
 946  
 947      my ($to);
 948      my $tmpl = {
 949          to  => { required => 1, store => \$to }
 950      };
 951      check( $tmpl, \%hash ) or return;
 952  
 953      if (my $rsync = can_run('rsync')) {
 954  
 955          my $cmd = [ $rsync ];
 956  
 957          ### XXX: rsync has no I/O timeouts at all, by default
 958          push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
 959  
 960          push(@$cmd, '--quiet') unless $DEBUG;
 961  
 962          ### DO NOT quote things for IPC::Run, it breaks stuff.
 963          push @$cmd, $IPC::Cmd::USE_IPC_RUN
 964                          ? ($self->uri, $to)
 965                          : (QUOTE. $self->uri .QUOTE, QUOTE. $to .QUOTE);
 966  
 967          my $captured;
 968          unless(run( command => $cmd,
 969                      buffer  => \$captured,
 970                      verbose => $DEBUG )
 971          ) {
 972  
 973              return $self->_error(loc("Command %1 failed: %2", 
 974                  "@$cmd" || '', $captured || ''));
 975          }
 976  
 977          return $to;
 978  
 979      } else {
 980          $METHOD_FAIL->{'rsync'} = 1;
 981          return;
 982      }
 983  }
 984  
 985  #################################
 986  #
 987  # Error code
 988  #
 989  #################################
 990  
 991  =pod
 992  
 993  =head2 $ff->error([BOOL])
 994  
 995  Returns the last encountered error as string.
 996  Pass it a true value to get the C<Carp::longmess()> output instead.
 997  
 998  =cut
 999  
1000  ### error handling the way Archive::Extract does it
1001  sub _error {
1002      my $self    = shift;
1003      my $error   = shift;
1004      
1005      $self->_error_msg( $error );
1006      $self->_error_msg_long( Carp::longmess($error) );
1007      
1008      if( $WARN ) {
1009          carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1010      }
1011  
1012      return;
1013  }
1014  
1015  sub error {
1016      my $self = shift;
1017      return shift() ? $self->_error_msg_long : $self->_error_msg;
1018  }
1019  
1020  
1021  1;
1022  
1023  =pod
1024  
1025  =head1 HOW IT WORKS
1026  
1027  File::Fetch is able to fetch a variety of uris, by using several
1028  external programs and modules.
1029  
1030  Below is a mapping of what utilities will be used in what order
1031  for what schemes, if available:
1032  
1033      file    => LWP, file
1034      http    => LWP, wget, curl, lynx
1035      ftp     => LWP, Net::FTP, wget, curl, ncftp, ftp
1036      rsync   => rsync
1037  
1038  If you'd like to disable the use of one or more of these utilities
1039  and/or modules, see the C<$BLACKLIST> variable further down.
1040  
1041  If a utility or module isn't available, it will be marked in a cache
1042  (see the C<$METHOD_FAIL> variable further down), so it will not be
1043  tried again. The C<fetch> method will only fail when all options are
1044  exhausted, and it was not able to retrieve the file.
1045  
1046  A special note about fetching files from an ftp uri:
1047  
1048  By default, all ftp connections are done in passive mode. To change
1049  that, see the C<$FTP_PASSIVE> variable further down.
1050  
1051  Furthermore, ftp uris only support anonymous connections, so no
1052  named user/password pair can be passed along.
1053  
1054  C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1055  further down.
1056  
1057  =head1 GLOBAL VARIABLES
1058  
1059  The behaviour of File::Fetch can be altered by changing the following
1060  global variables:
1061  
1062  =head2 $File::Fetch::FROM_EMAIL
1063  
1064  This is the email address that will be sent as your anonymous ftp
1065  password.
1066  
1067  Default is C<File-Fetch@example.com>.
1068  
1069  =head2 $File::Fetch::USER_AGENT
1070  
1071  This is the useragent as C<LWP> will report it.
1072  
1073  Default is C<File::Fetch/$VERSION>.
1074  
1075  =head2 $File::Fetch::FTP_PASSIVE
1076  
1077  This variable controls whether the environment variable C<FTP_PASSIVE>
1078  and any passive switches to commandline tools will be set to true.
1079  
1080  Default value is 1.
1081  
1082  Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1083  files, since passive mode can only be set interactively for this binary
1084  
1085  =head2 $File::Fetch::TIMEOUT
1086  
1087  When set, controls the network timeout (counted in seconds).
1088  
1089  Default value is 0.
1090  
1091  =head2 $File::Fetch::WARN
1092  
1093  This variable controls whether errors encountered internally by
1094  C<File::Fetch> should be C<carp>'d or not.
1095  
1096  Set to false to silence warnings. Inspect the output of the C<error()>
1097  method manually to see what went wrong.
1098  
1099  Defaults to C<true>.
1100  
1101  =head2 $File::Fetch::DEBUG
1102  
1103  This enables debugging output when calling commandline utilities to
1104  fetch files.
1105  This also enables C<Carp::longmess> errors, instead of the regular
1106  C<carp> errors.
1107  
1108  Good for tracking down why things don't work with your particular
1109  setup.
1110  
1111  Default is 0.
1112  
1113  =head2 $File::Fetch::BLACKLIST
1114  
1115  This is an array ref holding blacklisted modules/utilities for fetching
1116  files with.
1117  
1118  To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1119  set $File::Fetch::BLACKLIST to:
1120  
1121      $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1122  
1123  The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1124  
1125  See the note on C<MAPPING> below.
1126  
1127  =head2 $File::Fetch::METHOD_FAIL
1128  
1129  This is a hashref registering what modules/utilities were known to fail
1130  for fetching files (mostly because they weren't installed).
1131  
1132  You can reset this cache by assigning an empty hashref to it, or
1133  individually remove keys.
1134  
1135  See the note on C<MAPPING> below.
1136  
1137  =head1 MAPPING
1138  
1139  
1140  Here's a quick mapping for the utilities/modules, and their names for
1141  the $BLACKLIST, $METHOD_FAIL and other internal functions.
1142  
1143      LWP         => lwp
1144      Net::FTP    => netftp
1145      wget        => wget
1146      lynx        => lynx
1147      ncftp       => ncftp
1148      ftp         => ftp
1149      curl        => curl
1150      rsync       => rsync
1151  
1152  =head1 FREQUENTLY ASKED QUESTIONS
1153  
1154  =head2 So how do I use a proxy with File::Fetch?
1155  
1156  C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1157  You will need to set your environment variables accordingly. For
1158  example, to use an ftp proxy:
1159  
1160      $ENV{ftp_proxy} = 'foo.com';
1161  
1162  Refer to the LWP::UserAgent manpage for more details.
1163  
1164  =head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1165  
1166  C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1167  which we in turn capture. If that content is a 'custom' error file
1168  (like, say, a C<404 handler>), you will get that contents instead.
1169  
1170  Sadly, C<lynx> doesn't support any options to return a different exit
1171  code on non-C<200 OK> status, giving us no way to tell the difference
1172  between a 'successfull' fetch and a custom error page.
1173  
1174  Therefor, we recommend to only use C<lynx> as a last resort. This is 
1175  why it is at the back of our list of methods to try as well.
1176  
1177  =head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1178  
1179  C<File::Fetch> is relatively smart about things. When trying to write 
1180  a file to disk, it removes the C<query parameters> (see the 
1181  C<output_file> method for details) from the file name before creating
1182  it. In most cases this suffices.
1183  
1184  If you have any other characters you need to escape, please install 
1185  the C<URI::Escape> module from CPAN, and pre-encode your URI before
1186  passing it to C<File::Fetch>. You can read about the details of URIs 
1187  and URI encoding here:
1188  
1189    http://www.faqs.org/rfcs/rfc2396.html
1190  
1191  =head1 TODO
1192  
1193  =over 4
1194  
1195  =item Implement $PREFER_BIN
1196  
1197  To indicate to rather use commandline tools than modules
1198  
1199  =back
1200  
1201  =head1 BUG REPORTS
1202  
1203  Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1204  
1205  =head1 AUTHOR
1206  
1207  This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1208  
1209  =head1 COPYRIGHT
1210  
1211  This library is free software; you may redistribute and/or modify it 
1212  under the same terms as Perl itself.
1213  
1214  
1215  =cut
1216  
1217  # Local variables:
1218  # c-indentation-style: bsd
1219  # c-basic-offset: 4
1220  # indent-tabs-mode: nil
1221  # End:
1222  # vim: expandtab shiftwidth=4:
1223  
1224  
1225  
1226  


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1