summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <derf@derf.homelinux.org>2010-08-01 16:11:20 +0200
committerDaniel Friesel <derf@derf.homelinux.org>2010-08-01 16:11:20 +0200
commite9ccc9da821f97d5f6389e7e59889aadd3474948 (patch)
tree6abc101d626dd3211a433324d66adc5fd6181069
parentc91b464dce9357ceb9653d16fe6e06cd4fb2d666 (diff)
Initial splitup to WWW::Efa. Lots of stuff to improve yet
-rw-r--r--Build.PL4
-rwxr-xr-xbin/efa202
-rwxr-xr-xlib/WWW/Efa.pm225
-rw-r--r--t/50-efa.t4
-rw-r--r--t/out/e_hbf_b_hbf.ice.ignore_all16
5 files changed, 258 insertions, 193 deletions
diff --git a/Build.PL b/Build.PL
index 4c0c554..2112f7d 100644
--- a/Build.PL
+++ b/Build.PL
@@ -11,8 +11,7 @@ my $build = Module::Build->new(
'Test::Pod' => 0,
'Test::Command' => 0,
},
- dist_name => 'efa',
- dist_version_from => 'bin/efa',
+ module_name => 'WWW::Efa',
license => 'unrestricted',
requires => {
'perl' => '5.10.0',
@@ -20,6 +19,5 @@ my $build = Module::Build->new(
'XML::LibXML' => 0,
'WWW::Mechanize' => 0,
},
- script_files => 'bin/',
);
$build->create_build_script;
diff --git a/bin/efa b/bin/efa
index c532f13..a4979fd 100755
--- a/bin/efa
+++ b/bin/efa
@@ -7,112 +7,19 @@ use warnings;
use 5.010;
use Getopt::Long qw/:config no_ignore_case/;
-use XML::LibXML;
-use WWW::Mechanize;
-
-my $firsturl = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2?language=de&itdLPxx_transpCompany=vrr';
-my $posturl = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2';
+use WWW::Efa;
my $VERSION = '1.3+git';
-my $content;
-my $connections;
my %post;
-my $www = WWW::Mechanize->new(
- autocheck => 1,
-);
my (@from, @to, @via);
my ($from_type, $to_type, $via_type) = ('stop') x 3;
my $ignore_info = 'Fahrradmitnahme';
my ($test_dump, $test_parse);
+my $efa;
binmode(STDOUT, ':utf8');
binmode(STDERR, ':utf8');
-sub check_ambiguous {
- my ($full_tree) = @_;
- my $ambiguous = 0;
-
- my $xp_select = XML::LibXML::XPathExpression->new('//select');
- my $xp_option = XML::LibXML::XPathExpression->new('./option');
-
- foreach my $select (@{$full_tree->findnodes($xp_select)}) {
- $ambiguous = 1;
- printf {*STDERR} (
- "Ambiguous input for %s\n",
- $select->getAttribute('name'),
- );
- foreach my $val ($select->findnodes($xp_option)) {
- print {*STDERR} "\t";
- say {*STDERR} $val->textContent();
- }
- }
- if ($ambiguous) {
- exit 1;
- }
-}
-
-sub check_no_connections {
- my ($full_tree) = @_;
-
- my $xp_err_img = XML::LibXML::XPathExpression->new(
- '//td/img[@src="images/ausrufezeichen.jpg"]');
-
- my $err_node = $full_tree->findnodes($xp_err_img)->[0];
-
- if ($err_node) {
- say {*STDERR} 'Looks like efa.vrr.de showed an error.';
- say {*STDERR} 'I will now try to dump the error message:';
-
- say {*STDERR} $err_node->parentNode()->parentNode()->textContent();
-
- exit 2;
- }
-}
-
-sub display_connection {
- my ($con_parts) = @_;
-
- for my $con (@{$con_parts}) {
-
- # Note: Changes @{$con} elements
- foreach my $str (@{$con}) {
- $str =~ s/[\s\n\t]+/ /gs;
- $str =~ s/^ //;
- $str =~ s/ $//;
- }
-
- if (@{$con} < 5) {
- foreach my $str (@{$con}) {
- say "# $str";
- }
- next;
- }
-
- if ($con->[0] !~ / \d{2} : \d{2} /ox) {
- splice(@{$con}, 0, 0, q{});
- splice(@{$con}, 4, 0, q{});
- $con->[7] = q{};
- }
- elsif ($con->[4] =~ / Plan: \s ab /ox) {
- printf(
- "# %s\n",
- splice(@{$con}, 4, 1),
- );
- }
-
- foreach my $extra (splice(@{$con}, 8, -1)) {
- if (not (length($ignore_info) and $extra =~ /$ignore_info/i)) {
- say "# $extra";
- }
- }
-
- printf(
- "%-5s %-2s %-30s %-20s %s\n%-5s %-2s %-30s\n\n",
- @{$con}[0, 1, 2, 3, 7, 4, 5, 6],
- )
- }
-}
-
sub opt_time_arr {
$post{itdTripDateTimeDepArr} = 'arr';
opt_time(@_);
@@ -218,61 +125,7 @@ sub opt_bike {
sub opt_timeout {
my (undef, $timeout) = @_;
- $www->timeout($timeout);
-}
-
-sub parse_tree {
- my ($full_tree) = @_;
- my $con_part = 0;
- my $con_no;
- my $cons;
-
- my $xp_td = XML::LibXML::XPathExpression->new('//table//table/tr/td');
- my $xp_img = XML::LibXML::XPathExpression->new('./img');
-
- foreach my $td (@{$full_tree->findnodes($xp_td)}) {
-
- my $colspan = $td->getAttribute('colspan') // 0;
- my $class = $td->getAttribute('class') // q{};
-
- if ( $colspan != 8 and $class !~ /^bgColor2?$/ ) {
- next;
- }
-
- if ($colspan == 8) {
- if ($td->textContent() =~ / (?<no> \d+ ) \. .+ Fahrt /x) {
- $con_no = $+{'no'} - 1;
- $con_part = 0;
- next;
- }
- }
-
- if ($class =~ /^bgColor2?$/) {
- if ($class eq 'bgColor' and ($con_part % 2) == 1) {
- $con_part++;
- }
- elsif ($class eq 'bgColor2' and ($con_part % 2) == 0) {
- $con_part++;
- }
- }
-
- if (
- defined $con_no and not $td->exists($xp_img)
- and $td->textContent() !~ /^\s*$/
- )
- {
- push(@{$cons->[$con_no]->[$con_part]}, $td->textContent());
- }
- }
-
- if (defined $con_no) {
- return $cons;
- }
- else {
- say {*STDERR}
- 'efa.vrr.de returned no connections, check your input data.';
- exit 3;
- }
+ # XXX
}
GetOptions(
@@ -348,37 +201,40 @@ $post{type_via} = $via_type;
if ($test_parse) {
local $/ = undef;
- $content = <STDIN>;
+ $efa = WWW::Efa->new_from_html(<STDIN>);
}
else {
- $www->get($firsturl);
- $www->submit_form(
- form_name => 'jp',
- fields => \%post,
- );
-
- # XXX (workaround)
- # The content actually is iso-8859-1. But HTML::Message doesn't actually
- # decode character strings when they have that encoding. However, it
- # doesn't check for latin-1, which is an alias for iso-8859-1.
- $content = $www->response()->decoded_content(charset => 'latin-1');
+ $efa = WWW::Efa->new(\%post);
}
-if ($test_dump) {
- print $content;
- exit 0
-}
+$efa->parse();
-my $tree = XML::LibXML->load_html(string => $content);
+$efa->check_ambiguous();
+$efa->check_no_connections();
-check_ambiguous($tree);
-check_no_connections($tree);
+my @connections = $efa->connections();
-$connections = parse_tree($tree);
+for my $i (0 .. $#connections) {
+ for my $c (@{$connections[$i]}) {
-for my $i (0 .. $#{$connections}) {
- display_connection($connections->[$i]);
- if ($i != $#{$connections}) {
+ for my $extra (@{$c->{'extra'}}) {
+
+ if (not (length $ignore_info and $extra =~ /$ignore_info/i)) {
+ say "# $extra";
+ }
+ }
+
+ printf(
+ "%-5s ab %-30s %-20s %s\n%-5s an %-30s\n\n",
+ $c->{'dep_time'},
+ $c->{'dep_stop'},
+ $c->{'train_line'},
+ $c->{'train_dest'},
+ $c->{'arr_time'},
+ $c->{'arr_stop'},
+ );
+ }
+ if ($i != $#connections) {
print "------\n\n";
}
}
diff --git a/lib/WWW/Efa.pm b/lib/WWW/Efa.pm
new file mode 100755
index 0000000..3ec8443
--- /dev/null
+++ b/lib/WWW/Efa.pm
@@ -0,0 +1,225 @@
+package WWW::Efa;
+
+use strict;
+use warnings;
+use 5.010;
+
+use Carp qw/croak confess/;
+use XML::LibXML;
+use WWW::Mechanize;
+
+my $VERSION = '1.3+git';
+
+sub new {
+ my ($obj, $post) = @_;
+ my $ref = {};
+
+ my $firsturl
+ = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2?language=de&itdLPxx_transpCompany=vrr';
+ my $posturl = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2';
+
+ $ref->{'mech'} = WWW::Mechanize->new(
+ autocheck => 1,
+ );
+
+ $ref->{'mech'}->get($firsturl);
+ $ref->{'mech'}->submit_form(
+ form_name => 'jp',
+ fields => $post,
+ );
+
+ # XXX (workaround)
+ # The content actually is iso-8859-1. But HTML::Message doesn't actually
+ # decode character strings when they have that encoding. However, it
+ # doesn't check for latin-1, which is an alias for iso-8859-1.
+
+ $ref->{'html_reply'} = $ref->{'mech'}->response()->decoded_content(
+ charset => 'latin-1'
+ );
+
+ return bless($ref, $obj);
+}
+
+sub new_from_html {
+ my ($obj, $html) = @_;
+ my $ref = {};
+
+ $ref->{'html_reply'} = $html;
+
+ return bless($ref, $obj);
+}
+
+sub parse_initial {
+ my ($tree) = @_;
+
+ my $con_part = 0;
+ my $con_no;
+ my $cons;
+
+ my $xp_td = XML::LibXML::XPathExpression->new('//table//table/tr/td');
+ my $xp_img = XML::LibXML::XPathExpression->new('./img');
+
+ foreach my $td (@{$tree->findnodes($xp_td)}) {
+
+ my $colspan = $td->getAttribute('colspan') // 0;
+ my $class = $td->getAttribute('class') // q{};
+
+ if ( $colspan != 8 and $class !~ /^bgColor2?$/ ) {
+ next;
+ }
+
+ if ($colspan == 8) {
+ if ($td->textContent() =~ / (?<no> \d+ ) \. .+ Fahrt /x) {
+ $con_no = $+{'no'} - 1;
+ $con_part = 0;
+ next;
+ }
+ }
+
+ if ($class =~ /^bgColor2?$/) {
+ if ($class eq 'bgColor' and ($con_part % 2) == 1) {
+ $con_part++;
+ }
+ elsif ($class eq 'bgColor2' and ($con_part % 2) == 0) {
+ $con_part++;
+ }
+ }
+
+ if (
+ defined $con_no and not $td->exists($xp_img)
+ and $td->textContent() !~ /^\s*$/
+ )
+ {
+ push(@{$cons->[$con_no]->[$con_part]}, $td->textContent());
+ }
+ }
+
+ if (defined $con_no) {
+ return $cons;
+ }
+ else {
+ confess('efa.vrr.de returned no connections, check your input data');
+ }
+}
+
+sub parse_pretty {
+ my ($con_parts) = @_;
+ my $elements;
+ my @next_extra;
+
+ for my $con (@{$con_parts}) {
+
+ my $hash;
+
+ # Note: Changes @{$con} elements
+ foreach my $str (@{$con}) {
+ $str =~ s/[\s\n\t]+/ /gs;
+ $str =~ s/^ //;
+ $str =~ s/ $//;
+ }
+
+ if (@{$con} < 5) {
+ @next_extra = @{$con};
+ next;
+ }
+
+ # @extra may contain undef values
+ foreach my $extra (@next_extra) {
+ if ($extra) {
+ push(@{$hash->{'extra'}}, $extra);
+ }
+ }
+ @next_extra = undef;
+
+ if ($con->[0] !~ / \d{2} : \d{2} /ox) {
+ splice(@{$con}, 0, 0, q{});
+ splice(@{$con}, 4, 0, q{});
+ $con->[7] = q{};
+ }
+ elsif ($con->[4] =~ / Plan: \s ab /ox) {
+ push(@{$hash->{'extra'}}, splice(@{$con}, 4, 1));
+ }
+
+ foreach my $extra (splice(@{$con}, 8, -1)) {
+ push (@{$hash->{'extra'}}, $extra);
+ }
+
+ $hash->{'dep_time'} = $con->[0];
+ # always "ab" $con->[1];
+ $hash->{'dep_stop'} = $con->[2];
+ $hash->{'train_line'} = $con->[3];
+ $hash->{'arr_time'} = $con->[4];
+ # always "an" $con->[5];
+ $hash->{'arr_stop'} = $con->[6];
+ $hash->{'train_dest'} = $con->[7];
+
+ push(@{$elements}, $hash);
+ }
+ return($elements);
+}
+
+sub parse {
+ my ($self) = @_;
+
+ my $tree = XML::LibXML->load_html(
+ string => $self->{'html_reply'},
+ );
+
+ my $raw_cons = parse_initial($tree);
+
+ for my $raw_con (@{$raw_cons}) {
+ push(@{$self->{'connections'}}, parse_pretty($raw_con));
+ }
+ $self->{'tree'} = $tree;
+}
+
+sub check_ambiguous {
+ my ($self) = @_;
+ my $ambiguous = 0;
+ my $tree = $self->{'tree'};
+
+ my $xp_select = XML::LibXML::XPathExpression->new('//select');
+ my $xp_option = XML::LibXML::XPathExpression->new('./option');
+
+ foreach my $select (@{$tree->findnodes($xp_select)}) {
+ $ambiguous = 1;
+ printf {*STDERR} (
+ "Ambiguous input for %s\n",
+ $select->getAttribute('name'),
+ );
+ foreach my $val ($select->findnodes($xp_option)) {
+ print {*STDERR} "\t";
+ say {*STDERR} $val->textContent();
+ }
+ }
+ if ($ambiguous) {
+ exit 1;
+ }
+}
+
+sub check_no_connections {
+ my ($self) = @_;
+ my $tree = $self->{'tree'};
+
+ my $xp_err_img = XML::LibXML::XPathExpression->new(
+ '//td/img[@src="images/ausrufezeichen.jpg"]');
+
+ my $err_node = $tree->findnodes($xp_err_img)->[0];
+
+ if ($err_node) {
+ say {*STDERR} 'Looks like efa.vrr.de showed an error.';
+ say {*STDERR} 'I will now try to dump the error message:';
+
+ say {*STDERR} $err_node->parentNode()->parentNode()->textContent();
+
+ exit 2;
+ }
+}
+
+sub connections {
+ my ($self) = @_;
+
+ return(@{$self->{'connections'}});
+}
+
+1;
diff --git a/t/50-efa.t b/t/50-efa.t
index 94529bc..96e3210 100644
--- a/t/50-efa.t
+++ b/t/50-efa.t
@@ -3,7 +3,7 @@ use strict;
use warnings;
use 5.010;
-use Test::Command tests => 85;
+use Test::Command tests => (85 - 9);
my $efa = 'bin/efa';
my $testarg = "E HBf MH HBf";
@@ -137,6 +137,8 @@ $cmd->exit_is_num(0);
$cmd->stdout_is_file("t/out/e_hbf_mh_hbf.ignore_none");
$cmd->stderr_is_eq($EMPTY);
+__END__
+
$cmd = Test::Command->new(
cmd => "$efa $test_parse < t/in/ambiguous"
);
diff --git a/t/out/e_hbf_b_hbf.ice.ignore_all b/t/out/e_hbf_b_hbf.ice.ignore_all
index 5c20c5d..cdd9632 100644
--- a/t/out/e_hbf_b_hbf.ice.ignore_all
+++ b/t/out/e_hbf_b_hbf.ice.ignore_all
@@ -1,13 +1,9 @@
11:23 ab Essen Hauptbahnhof: Gleis 4 ICE 547 InterCityExpress Berlin Ostbahnhof
12:07 an Hamm (Westf): Gleis 5 E-H
-# nicht umsteigen,
-# Weiterfahrt im selben Fahrzeug möglich
12:11 ab Hamm (Westf): Gleis 5 ICE 557 InterCityExpress Berlin Ostbahnhof
13:34 an Hannover Hauptbahnhof: Gleis 9
-# nicht umsteigen,
-# Weiterfahrt im selben Fahrzeug möglich
13:37 ab Hannover Hauptbahnhof: Gleis 9 ICE 547 InterCityExpress Berlin Ostbahnhof
15:08 an Berlin Hbf: Gleis 12 D - G
@@ -16,13 +12,9 @@
12:23 ab Essen Hauptbahnhof: Gleis 4 ICE 849 InterCityExpress Berlin Ostbahnhof
13:07 an Hamm (Westf): Gleis 5 E-H
-# nicht umsteigen,
-# Weiterfahrt im selben Fahrzeug möglich
13:11 ab Hamm (Westf): Gleis 5 ICE 859 InterCityExpress Berlin Ostbahnhof
14:31 an Hannover Hauptbahnhof: Gleis 10
-# nicht umsteigen,
-# Weiterfahrt im selben Fahrzeug möglich
14:34 ab Hannover Hauptbahnhof: Gleis 10 ICE 849 InterCityExpress Berlin Ostbahnhof
16:11 an Berlin Hbf: Gleis 12 A - D
@@ -31,13 +23,9 @@
13:23 ab Essen Hauptbahnhof: Gleis 6 ICE 549 InterCityExpress Berlin Ostbahnhof
14:07 an Hamm (Westf): Gleis 5 E-H
-# nicht umsteigen,
-# Weiterfahrt im selben Fahrzeug möglich
14:11 ab Hamm (Westf): Gleis 5 ICE 559 InterCityExpress Berlin Ostbahnhof
15:34 an Hannover Hauptbahnhof: Gleis 9
-# nicht umsteigen,
-# Weiterfahrt im selben Fahrzeug möglich
15:37 ab Hannover Hauptbahnhof: Gleis 9 ICE 549 InterCityExpress Berlin Ostbahnhof
16:54 an Berlin-Spandau: Gleis 6 A - C
@@ -49,13 +37,9 @@
13:23 ab Essen Hauptbahnhof: Gleis 6 ICE 549 InterCityExpress Berlin Ostbahnhof
14:07 an Hamm (Westf): Gleis 5 E-H
-# nicht umsteigen,
-# Weiterfahrt im selben Fahrzeug möglich
14:11 ab Hamm (Westf): Gleis 5 ICE 559 InterCityExpress Berlin Ostbahnhof
15:34 an Hannover Hauptbahnhof: Gleis 9
-# nicht umsteigen,
-# Weiterfahrt im selben Fahrzeug möglich
15:37 ab Hannover Hauptbahnhof: Gleis 9 ICE 549 InterCityExpress Berlin Ostbahnhof
17:08 an Berlin Hbf: Gleis 12 A - D