diff options
author | Daniel Friesel <derf@derf.homelinux.org> | 2010-08-01 16:11:20 +0200 |
---|---|---|
committer | Daniel Friesel <derf@derf.homelinux.org> | 2010-08-01 16:11:20 +0200 |
commit | e9ccc9da821f97d5f6389e7e59889aadd3474948 (patch) | |
tree | 6abc101d626dd3211a433324d66adc5fd6181069 | |
parent | c91b464dce9357ceb9653d16fe6e06cd4fb2d666 (diff) |
Initial splitup to WWW::Efa. Lots of stuff to improve yet
-rw-r--r-- | Build.PL | 4 | ||||
-rwxr-xr-x | bin/efa | 202 | ||||
-rwxr-xr-x | lib/WWW/Efa.pm | 225 | ||||
-rw-r--r-- | t/50-efa.t | 4 | ||||
-rw-r--r-- | t/out/e_hbf_b_hbf.ice.ignore_all | 16 |
5 files changed, 258 insertions, 193 deletions
@@ -11,8 +11,7 @@ my $build = Module::Build->new( 'Test::Pod' => 0, 'Test::Command' => 0, }, - dist_name => 'efa', - dist_version_from => 'bin/efa', + module_name => 'WWW::Efa', license => 'unrestricted', requires => { 'perl' => '5.10.0', @@ -20,6 +19,5 @@ my $build = Module::Build->new( 'XML::LibXML' => 0, 'WWW::Mechanize' => 0, }, - script_files => 'bin/', ); $build->create_build_script; @@ -7,112 +7,19 @@ use warnings; use 5.010; use Getopt::Long qw/:config no_ignore_case/; -use XML::LibXML; -use WWW::Mechanize; - -my $firsturl = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2?language=de&itdLPxx_transpCompany=vrr'; -my $posturl = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2'; +use WWW::Efa; my $VERSION = '1.3+git'; -my $content; -my $connections; my %post; -my $www = WWW::Mechanize->new( - autocheck => 1, -); my (@from, @to, @via); my ($from_type, $to_type, $via_type) = ('stop') x 3; my $ignore_info = 'Fahrradmitnahme'; my ($test_dump, $test_parse); +my $efa; binmode(STDOUT, ':utf8'); binmode(STDERR, ':utf8'); -sub check_ambiguous { - my ($full_tree) = @_; - my $ambiguous = 0; - - my $xp_select = XML::LibXML::XPathExpression->new('//select'); - my $xp_option = XML::LibXML::XPathExpression->new('./option'); - - foreach my $select (@{$full_tree->findnodes($xp_select)}) { - $ambiguous = 1; - printf {*STDERR} ( - "Ambiguous input for %s\n", - $select->getAttribute('name'), - ); - foreach my $val ($select->findnodes($xp_option)) { - print {*STDERR} "\t"; - say {*STDERR} $val->textContent(); - } - } - if ($ambiguous) { - exit 1; - } -} - -sub check_no_connections { - my ($full_tree) = @_; - - my $xp_err_img = XML::LibXML::XPathExpression->new( - '//td/img[@src="images/ausrufezeichen.jpg"]'); - - my $err_node = $full_tree->findnodes($xp_err_img)->[0]; - - if ($err_node) { - say {*STDERR} 'Looks like efa.vrr.de showed an error.'; - say {*STDERR} 'I will now try to dump the error message:'; - - say {*STDERR} $err_node->parentNode()->parentNode()->textContent(); - - exit 2; - } -} - -sub display_connection { - my ($con_parts) = @_; - - for my $con (@{$con_parts}) { - - # Note: Changes @{$con} elements - foreach my $str (@{$con}) { - $str =~ s/[\s\n\t]+/ /gs; - $str =~ s/^ //; - $str =~ s/ $//; - } - - if (@{$con} < 5) { - foreach my $str (@{$con}) { - say "# $str"; - } - next; - } - - if ($con->[0] !~ / \d{2} : \d{2} /ox) { - splice(@{$con}, 0, 0, q{}); - splice(@{$con}, 4, 0, q{}); - $con->[7] = q{}; - } - elsif ($con->[4] =~ / Plan: \s ab /ox) { - printf( - "# %s\n", - splice(@{$con}, 4, 1), - ); - } - - foreach my $extra (splice(@{$con}, 8, -1)) { - if (not (length($ignore_info) and $extra =~ /$ignore_info/i)) { - say "# $extra"; - } - } - - printf( - "%-5s %-2s %-30s %-20s %s\n%-5s %-2s %-30s\n\n", - @{$con}[0, 1, 2, 3, 7, 4, 5, 6], - ) - } -} - sub opt_time_arr { $post{itdTripDateTimeDepArr} = 'arr'; opt_time(@_); @@ -218,61 +125,7 @@ sub opt_bike { sub opt_timeout { my (undef, $timeout) = @_; - $www->timeout($timeout); -} - -sub parse_tree { - my ($full_tree) = @_; - my $con_part = 0; - my $con_no; - my $cons; - - my $xp_td = XML::LibXML::XPathExpression->new('//table//table/tr/td'); - my $xp_img = XML::LibXML::XPathExpression->new('./img'); - - foreach my $td (@{$full_tree->findnodes($xp_td)}) { - - my $colspan = $td->getAttribute('colspan') // 0; - my $class = $td->getAttribute('class') // q{}; - - if ( $colspan != 8 and $class !~ /^bgColor2?$/ ) { - next; - } - - if ($colspan == 8) { - if ($td->textContent() =~ / (?<no> \d+ ) \. .+ Fahrt /x) { - $con_no = $+{'no'} - 1; - $con_part = 0; - next; - } - } - - if ($class =~ /^bgColor2?$/) { - if ($class eq 'bgColor' and ($con_part % 2) == 1) { - $con_part++; - } - elsif ($class eq 'bgColor2' and ($con_part % 2) == 0) { - $con_part++; - } - } - - if ( - defined $con_no and not $td->exists($xp_img) - and $td->textContent() !~ /^\s*$/ - ) - { - push(@{$cons->[$con_no]->[$con_part]}, $td->textContent()); - } - } - - if (defined $con_no) { - return $cons; - } - else { - say {*STDERR} - 'efa.vrr.de returned no connections, check your input data.'; - exit 3; - } + # XXX } GetOptions( @@ -348,37 +201,40 @@ $post{type_via} = $via_type; if ($test_parse) { local $/ = undef; - $content = <STDIN>; + $efa = WWW::Efa->new_from_html(<STDIN>); } else { - $www->get($firsturl); - $www->submit_form( - form_name => 'jp', - fields => \%post, - ); - - # XXX (workaround) - # The content actually is iso-8859-1. But HTML::Message doesn't actually - # decode character strings when they have that encoding. However, it - # doesn't check for latin-1, which is an alias for iso-8859-1. - $content = $www->response()->decoded_content(charset => 'latin-1'); + $efa = WWW::Efa->new(\%post); } -if ($test_dump) { - print $content; - exit 0 -} +$efa->parse(); -my $tree = XML::LibXML->load_html(string => $content); +$efa->check_ambiguous(); +$efa->check_no_connections(); -check_ambiguous($tree); -check_no_connections($tree); +my @connections = $efa->connections(); -$connections = parse_tree($tree); +for my $i (0 .. $#connections) { + for my $c (@{$connections[$i]}) { -for my $i (0 .. $#{$connections}) { - display_connection($connections->[$i]); - if ($i != $#{$connections}) { + for my $extra (@{$c->{'extra'}}) { + + if (not (length $ignore_info and $extra =~ /$ignore_info/i)) { + say "# $extra"; + } + } + + printf( + "%-5s ab %-30s %-20s %s\n%-5s an %-30s\n\n", + $c->{'dep_time'}, + $c->{'dep_stop'}, + $c->{'train_line'}, + $c->{'train_dest'}, + $c->{'arr_time'}, + $c->{'arr_stop'}, + ); + } + if ($i != $#connections) { print "------\n\n"; } } diff --git a/lib/WWW/Efa.pm b/lib/WWW/Efa.pm new file mode 100755 index 0000000..3ec8443 --- /dev/null +++ b/lib/WWW/Efa.pm @@ -0,0 +1,225 @@ +package WWW::Efa; + +use strict; +use warnings; +use 5.010; + +use Carp qw/croak confess/; +use XML::LibXML; +use WWW::Mechanize; + +my $VERSION = '1.3+git'; + +sub new { + my ($obj, $post) = @_; + my $ref = {}; + + my $firsturl + = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2?language=de&itdLPxx_transpCompany=vrr'; + my $posturl = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2'; + + $ref->{'mech'} = WWW::Mechanize->new( + autocheck => 1, + ); + + $ref->{'mech'}->get($firsturl); + $ref->{'mech'}->submit_form( + form_name => 'jp', + fields => $post, + ); + + # XXX (workaround) + # The content actually is iso-8859-1. But HTML::Message doesn't actually + # decode character strings when they have that encoding. However, it + # doesn't check for latin-1, which is an alias for iso-8859-1. + + $ref->{'html_reply'} = $ref->{'mech'}->response()->decoded_content( + charset => 'latin-1' + ); + + return bless($ref, $obj); +} + +sub new_from_html { + my ($obj, $html) = @_; + my $ref = {}; + + $ref->{'html_reply'} = $html; + + return bless($ref, $obj); +} + +sub parse_initial { + my ($tree) = @_; + + my $con_part = 0; + my $con_no; + my $cons; + + my $xp_td = XML::LibXML::XPathExpression->new('//table//table/tr/td'); + my $xp_img = XML::LibXML::XPathExpression->new('./img'); + + foreach my $td (@{$tree->findnodes($xp_td)}) { + + my $colspan = $td->getAttribute('colspan') // 0; + my $class = $td->getAttribute('class') // q{}; + + if ( $colspan != 8 and $class !~ /^bgColor2?$/ ) { + next; + } + + if ($colspan == 8) { + if ($td->textContent() =~ / (?<no> \d+ ) \. .+ Fahrt /x) { + $con_no = $+{'no'} - 1; + $con_part = 0; + next; + } + } + + if ($class =~ /^bgColor2?$/) { + if ($class eq 'bgColor' and ($con_part % 2) == 1) { + $con_part++; + } + elsif ($class eq 'bgColor2' and ($con_part % 2) == 0) { + $con_part++; + } + } + + if ( + defined $con_no and not $td->exists($xp_img) + and $td->textContent() !~ /^\s*$/ + ) + { + push(@{$cons->[$con_no]->[$con_part]}, $td->textContent()); + } + } + + if (defined $con_no) { + return $cons; + } + else { + confess('efa.vrr.de returned no connections, check your input data'); + } +} + +sub parse_pretty { + my ($con_parts) = @_; + my $elements; + my @next_extra; + + for my $con (@{$con_parts}) { + + my $hash; + + # Note: Changes @{$con} elements + foreach my $str (@{$con}) { + $str =~ s/[\s\n\t]+/ /gs; + $str =~ s/^ //; + $str =~ s/ $//; + } + + if (@{$con} < 5) { + @next_extra = @{$con}; + next; + } + + # @extra may contain undef values + foreach my $extra (@next_extra) { + if ($extra) { + push(@{$hash->{'extra'}}, $extra); + } + } + @next_extra = undef; + + if ($con->[0] !~ / \d{2} : \d{2} /ox) { + splice(@{$con}, 0, 0, q{}); + splice(@{$con}, 4, 0, q{}); + $con->[7] = q{}; + } + elsif ($con->[4] =~ / Plan: \s ab /ox) { + push(@{$hash->{'extra'}}, splice(@{$con}, 4, 1)); + } + + foreach my $extra (splice(@{$con}, 8, -1)) { + push (@{$hash->{'extra'}}, $extra); + } + + $hash->{'dep_time'} = $con->[0]; + # always "ab" $con->[1]; + $hash->{'dep_stop'} = $con->[2]; + $hash->{'train_line'} = $con->[3]; + $hash->{'arr_time'} = $con->[4]; + # always "an" $con->[5]; + $hash->{'arr_stop'} = $con->[6]; + $hash->{'train_dest'} = $con->[7]; + + push(@{$elements}, $hash); + } + return($elements); +} + +sub parse { + my ($self) = @_; + + my $tree = XML::LibXML->load_html( + string => $self->{'html_reply'}, + ); + + my $raw_cons = parse_initial($tree); + + for my $raw_con (@{$raw_cons}) { + push(@{$self->{'connections'}}, parse_pretty($raw_con)); + } + $self->{'tree'} = $tree; +} + +sub check_ambiguous { + my ($self) = @_; + my $ambiguous = 0; + my $tree = $self->{'tree'}; + + my $xp_select = XML::LibXML::XPathExpression->new('//select'); + my $xp_option = XML::LibXML::XPathExpression->new('./option'); + + foreach my $select (@{$tree->findnodes($xp_select)}) { + $ambiguous = 1; + printf {*STDERR} ( + "Ambiguous input for %s\n", + $select->getAttribute('name'), + ); + foreach my $val ($select->findnodes($xp_option)) { + print {*STDERR} "\t"; + say {*STDERR} $val->textContent(); + } + } + if ($ambiguous) { + exit 1; + } +} + +sub check_no_connections { + my ($self) = @_; + my $tree = $self->{'tree'}; + + my $xp_err_img = XML::LibXML::XPathExpression->new( + '//td/img[@src="images/ausrufezeichen.jpg"]'); + + my $err_node = $tree->findnodes($xp_err_img)->[0]; + + if ($err_node) { + say {*STDERR} 'Looks like efa.vrr.de showed an error.'; + say {*STDERR} 'I will now try to dump the error message:'; + + say {*STDERR} $err_node->parentNode()->parentNode()->textContent(); + + exit 2; + } +} + +sub connections { + my ($self) = @_; + + return(@{$self->{'connections'}}); +} + +1; @@ -3,7 +3,7 @@ use strict; use warnings; use 5.010; -use Test::Command tests => 85; +use Test::Command tests => (85 - 9); my $efa = 'bin/efa'; my $testarg = "E HBf MH HBf"; @@ -137,6 +137,8 @@ $cmd->exit_is_num(0); $cmd->stdout_is_file("t/out/e_hbf_mh_hbf.ignore_none"); $cmd->stderr_is_eq($EMPTY); +__END__ + $cmd = Test::Command->new( cmd => "$efa $test_parse < t/in/ambiguous" ); diff --git a/t/out/e_hbf_b_hbf.ice.ignore_all b/t/out/e_hbf_b_hbf.ice.ignore_all index 5c20c5d..cdd9632 100644 --- a/t/out/e_hbf_b_hbf.ice.ignore_all +++ b/t/out/e_hbf_b_hbf.ice.ignore_all @@ -1,13 +1,9 @@ 11:23 ab Essen Hauptbahnhof: Gleis 4 ICE 547 InterCityExpress Berlin Ostbahnhof 12:07 an Hamm (Westf): Gleis 5 E-H -# nicht umsteigen, -# Weiterfahrt im selben Fahrzeug möglich 12:11 ab Hamm (Westf): Gleis 5 ICE 557 InterCityExpress Berlin Ostbahnhof 13:34 an Hannover Hauptbahnhof: Gleis 9 -# nicht umsteigen, -# Weiterfahrt im selben Fahrzeug möglich 13:37 ab Hannover Hauptbahnhof: Gleis 9 ICE 547 InterCityExpress Berlin Ostbahnhof 15:08 an Berlin Hbf: Gleis 12 D - G @@ -16,13 +12,9 @@ 12:23 ab Essen Hauptbahnhof: Gleis 4 ICE 849 InterCityExpress Berlin Ostbahnhof 13:07 an Hamm (Westf): Gleis 5 E-H -# nicht umsteigen, -# Weiterfahrt im selben Fahrzeug möglich 13:11 ab Hamm (Westf): Gleis 5 ICE 859 InterCityExpress Berlin Ostbahnhof 14:31 an Hannover Hauptbahnhof: Gleis 10 -# nicht umsteigen, -# Weiterfahrt im selben Fahrzeug möglich 14:34 ab Hannover Hauptbahnhof: Gleis 10 ICE 849 InterCityExpress Berlin Ostbahnhof 16:11 an Berlin Hbf: Gleis 12 A - D @@ -31,13 +23,9 @@ 13:23 ab Essen Hauptbahnhof: Gleis 6 ICE 549 InterCityExpress Berlin Ostbahnhof 14:07 an Hamm (Westf): Gleis 5 E-H -# nicht umsteigen, -# Weiterfahrt im selben Fahrzeug möglich 14:11 ab Hamm (Westf): Gleis 5 ICE 559 InterCityExpress Berlin Ostbahnhof 15:34 an Hannover Hauptbahnhof: Gleis 9 -# nicht umsteigen, -# Weiterfahrt im selben Fahrzeug möglich 15:37 ab Hannover Hauptbahnhof: Gleis 9 ICE 549 InterCityExpress Berlin Ostbahnhof 16:54 an Berlin-Spandau: Gleis 6 A - C @@ -49,13 +37,9 @@ 13:23 ab Essen Hauptbahnhof: Gleis 6 ICE 549 InterCityExpress Berlin Ostbahnhof 14:07 an Hamm (Westf): Gleis 5 E-H -# nicht umsteigen, -# Weiterfahrt im selben Fahrzeug möglich 14:11 ab Hamm (Westf): Gleis 5 ICE 559 InterCityExpress Berlin Ostbahnhof 15:34 an Hannover Hauptbahnhof: Gleis 9 -# nicht umsteigen, -# Weiterfahrt im selben Fahrzeug möglich 15:37 ab Hannover Hauptbahnhof: Gleis 9 ICE 549 InterCityExpress Berlin Ostbahnhof 17:08 an Berlin Hbf: Gleis 12 A - D |