summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <derf@derf.homelinux.org>2010-06-06 16:18:02 +0200
committerDaniel Friesel <derf@derf.homelinux.org>2010-06-06 16:18:02 +0200
commitdde575a19651589a65f809f67f140978f97a8eda (patch)
treea65cb8bcb8d100f14db21e1c8ea6f7ae66d924ca
parent8413c8a433a7f97aba2f257140ed53f46eda6885 (diff)
Use HTML::TreeBuilder::LibXML. Major performance enhancement.
-rwxr-xr-xbin/efa19
-rw-r--r--test/parse_ambiguous14
2 files changed, 18 insertions, 15 deletions
diff --git a/bin/efa b/bin/efa
index 764cfa4..49b658f 100755
--- a/bin/efa
+++ b/bin/efa
@@ -7,7 +7,7 @@ use warnings;
use 5.010;
use Getopt::Long qw/:config no_ignore_case/;
-use HTML::TreeBuilder::XPath;
+use HTML::TreeBuilder::LibXML;
use WWW::Mechanize;
my $firsturl = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2?language=de&itdLPxx_transpCompany=vrr';
@@ -37,8 +37,9 @@ sub check_ambiguous {
"Ambiguous input for %s\n",
$select->attr('name'),
);
- foreach my $val ($select->findnodes_as_strings('./option')) {
- say "\t$val";
+ foreach my $val ($select->findnodes('./option')) {
+ print "\t";
+ say $val->as_trimmed_text();
}
}
if ($ambiguous) {
@@ -77,7 +78,7 @@ sub display_connection {
}
printf(
- "%-5s %-2s %-30s %-20s %s\n%-5s %-2s %-30s\n\n",
+ "%-5s %-2s %-30s %-20s %s\n%-5s %-2s %-30s\n\n",
@{$con}[0, 1, 2, 3, 7, 4, 5, 6],
)
}
@@ -209,7 +210,7 @@ sub parse_tree {
}
if ($colspan == 8) {
- if ($td->as_text() =~ / (?<no> \d+ ) \. .+ Fahrt /x) {
+ if ($td->as_trimmed_text() =~ / (?<no> \d+ ) \. .+ Fahrt /x) {
$con_no = $+{'no'} - 1;
$con_part = 0;
next;
@@ -225,8 +226,8 @@ sub parse_tree {
}
}
- if (not $td->exists('./img') and $td->as_text() !~ /^\s*$/) {
- push(@{$cons->[$con_no]->[$con_part]}, $td->as_text());
+ if (not @{$td->findnodes('./img')} and $td->as_text() !~ /^\s*$/) {
+ push(@{$cons->[$con_no]->[$con_part]}, $td->as_trimmed_text());
}
}
return $cons;
@@ -325,7 +326,9 @@ if ($test_dump) {
exit 0
}
-my $tree = HTML::TreeBuilder::XPath->new_from_content($content);
+my $tree = HTML::TreeBuilder::LibXML->new();
+$tree->parse($content);
+$tree->eof();
check_ambiguous($tree);
diff --git a/test/parse_ambiguous b/test/parse_ambiguous
index 0dd2043..7b17b37 100644
--- a/test/parse_ambiguous
+++ b/test/parse_ambiguous
@@ -1,9 +1,9 @@
Ambiguous input for name_origin
- Bredeney
- Bredeney Friedhof
- Bredeneyer Kreuz
+ Bredeney
+ Bredeney Friedhof
+ Bredeneyer Kreuz
Ambiguous input for name_destination
- Werden Brücke
- Werden S
- Werdener Markt
- Werdener Str.
+ Werden Brücke
+ Werden S
+ Werdener Markt
+ Werdener Str.