diff options
author | Daniel Friesel <derf@derf.homelinux.org> | 2010-06-06 16:18:02 +0200 |
---|---|---|
committer | Daniel Friesel <derf@derf.homelinux.org> | 2010-06-06 16:18:02 +0200 |
commit | dde575a19651589a65f809f67f140978f97a8eda (patch) | |
tree | a65cb8bcb8d100f14db21e1c8ea6f7ae66d924ca | |
parent | 8413c8a433a7f97aba2f257140ed53f46eda6885 (diff) |
Use HTML::TreeBuilder::LibXML. Major performance enhancement.
-rwxr-xr-x | bin/efa | 19 | ||||
-rw-r--r-- | test/parse_ambiguous | 14 |
2 files changed, 18 insertions, 15 deletions
@@ -7,7 +7,7 @@ use warnings; use 5.010; use Getopt::Long qw/:config no_ignore_case/; -use HTML::TreeBuilder::XPath; +use HTML::TreeBuilder::LibXML; use WWW::Mechanize; my $firsturl = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2?language=de&itdLPxx_transpCompany=vrr'; @@ -37,8 +37,9 @@ sub check_ambiguous { "Ambiguous input for %s\n", $select->attr('name'), ); - foreach my $val ($select->findnodes_as_strings('./option')) { - say "\t$val"; + foreach my $val ($select->findnodes('./option')) { + print "\t"; + say $val->as_trimmed_text(); } } if ($ambiguous) { @@ -77,7 +78,7 @@ sub display_connection { } printf( - "%-5s %-2s %-30s %-20s %s\n%-5s %-2s %-30s\n\n", + "%-5s %-2s %-30s %-20s %s\n%-5s %-2s %-30s\n\n", @{$con}[0, 1, 2, 3, 7, 4, 5, 6], ) } @@ -209,7 +210,7 @@ sub parse_tree { } if ($colspan == 8) { - if ($td->as_text() =~ / (?<no> \d+ ) \. .+ Fahrt /x) { + if ($td->as_trimmed_text() =~ / (?<no> \d+ ) \. .+ Fahrt /x) { $con_no = $+{'no'} - 1; $con_part = 0; next; @@ -225,8 +226,8 @@ sub parse_tree { } } - if (not $td->exists('./img') and $td->as_text() !~ /^\s*$/) { - push(@{$cons->[$con_no]->[$con_part]}, $td->as_text()); + if (not @{$td->findnodes('./img')} and $td->as_text() !~ /^\s*$/) { + push(@{$cons->[$con_no]->[$con_part]}, $td->as_trimmed_text()); } } return $cons; @@ -325,7 +326,9 @@ if ($test_dump) { exit 0 } -my $tree = HTML::TreeBuilder::XPath->new_from_content($content); +my $tree = HTML::TreeBuilder::LibXML->new(); +$tree->parse($content); +$tree->eof(); check_ambiguous($tree); diff --git a/test/parse_ambiguous b/test/parse_ambiguous index 0dd2043..7b17b37 100644 --- a/test/parse_ambiguous +++ b/test/parse_ambiguous @@ -1,9 +1,9 @@ Ambiguous input for name_origin - Bredeney - Bredeney Friedhof - Bredeneyer Kreuz + Bredeney + Bredeney Friedhof + Bredeneyer Kreuz Ambiguous input for name_destination - Werden Brücke - Werden S - Werdener Markt - Werdener Str. + Werden Brücke + Werden S + Werdener Markt + Werdener Str. |