From 04f76c53cad5469913723e966b7fae9d1b58ca14 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Sun, 6 Jun 2010 16:38:07 +0200 Subject: Use XML::LibXML directly --- bin/efa | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) (limited to 'bin') diff --git a/bin/efa b/bin/efa index 49b658f..d0892db 100755 --- a/bin/efa +++ b/bin/efa @@ -7,7 +7,7 @@ use warnings; use 5.010; use Getopt::Long qw/:config no_ignore_case/; -use HTML::TreeBuilder::LibXML; +use XML::LibXML; use WWW::Mechanize; my $firsturl = 'http://efa.vrr.de/vrr/XSLT_TRIP_REQUEST2?language=de&itdLPxx_transpCompany=vrr'; @@ -31,15 +31,18 @@ sub check_ambiguous { my ($full_tree) = @_; my $ambiguous = 0; - foreach my $select (@{$full_tree->findnodes('//select')}) { + my $xp_select = XML::LibXML::XPathExpression->new('//select'); + my $xp_option = XML::LibXML::XPathExpression->new('./option'); + + foreach my $select (@{$full_tree->findnodes($xp_select)}) { $ambiguous = 1; printf( "Ambiguous input for %s\n", - $select->attr('name'), + $select->getAttribute('name'), ); - foreach my $val ($select->findnodes('./option')) { + foreach my $val ($select->findnodes($xp_option)) { print "\t"; - say $val->as_trimmed_text(); + say $val->textContent(); } } if ($ambiguous) { @@ -52,6 +55,13 @@ sub display_connection { for my $con (@{$con_parts}) { + # Note: Idets @{$con} elements + foreach my $str (@{$con}) { + $str =~ s/[\s\n\t]+/ /gs; + $str =~ s/^ //; + $str =~ s/ $//; + } + if (@{$con} < 5) { foreach my $str (@{$con}) { say "# $str"; @@ -198,19 +208,20 @@ sub parse_tree { my $con_no = 0; my $cons; - foreach my $td (@{$full_tree->findnodes('//table//table/tr/td')}) { + my $xp_td = XML::LibXML::XPathExpression->new('//table//table/tr/td'); + my $xp_img = XML::LibXML::XPathExpression->new('./img'); + + foreach my $td (@{$full_tree->findnodes($xp_td)}) { - my $colspan = $td->attr('colspan') // 0; - my $class = $td->attr('class') // q{}; + my $colspan = $td->getAttribute('colspan') // 0; + my $class = $td->getAttribute('class') // q{}; - # Putting these into the XPath expression would lead to noticable (1 - # to 2 seconds) performance penalties if ( $colspan != 8 and $class !~ /^bgColor2?$/ ) { next; } if ($colspan == 8) { - if ($td->as_trimmed_text() =~ / (? \d+ ) \. .+ Fahrt /x) { + if ($td->textContent() =~ / (? \d+ ) \. .+ Fahrt /x) { $con_no = $+{'no'} - 1; $con_part = 0; next; @@ -226,8 +237,8 @@ sub parse_tree { } } - if (not @{$td->findnodes('./img')} and $td->as_text() !~ /^\s*$/) { - push(@{$cons->[$con_no]->[$con_part]}, $td->as_trimmed_text()); + if (not @{$td->findnodes($xp_img)} and $td->textContent() !~ /^\s*$/) { + push(@{$cons->[$con_no]->[$con_part]}, $td->textContent()); } } return $cons; @@ -326,9 +337,7 @@ if ($test_dump) { exit 0 } -my $tree = HTML::TreeBuilder::LibXML->new(); -$tree->parse($content); -$tree->eof(); +my $tree = XML::LibXML->load_html(string => $content); check_ambiguous($tree); -- cgit v1.2.3