diff options
author | Daniel Friesel <derf@finalrewind.org> | 2020-11-06 11:23:22 +0100 |
---|---|---|
committer | Daniel Friesel <derf@finalrewind.org> | 2020-11-06 11:23:22 +0100 |
commit | 5e967451211ecee9b7810311acdc96cafdfce228 (patch) | |
tree | 4cb29af3d10748d312442183935f72f225dcd83d /lib/Travelynx | |
parent | c70280a93562e34ad3a711e23d138a96aca5ad58 (diff) |
HAFAS: Distinguish between disguised HTTP 404 and invalid XML
Diffstat (limited to 'lib/Travelynx')
-rw-r--r-- | lib/Travelynx/Helper/HAFAS.pm | 16 |
1 files changed, 13 insertions, 3 deletions
diff --git a/lib/Travelynx/Helper/HAFAS.pm b/lib/Travelynx/Helper/HAFAS.pm index 6aefcf1..bee4cba 100644 --- a/lib/Travelynx/Helper/HAFAS.pm +++ b/lib/Travelynx/Helper/HAFAS.pm @@ -197,10 +197,20 @@ sub get_xml_p { $body =~ s{<Attribute([^>]+)text="([^"]*)"([^"=]*)""}{<Attribute$1text="$2*$3*"}s; eval { $tree = XML::LibXML->load_xml( string => $body ) }; - if ($@) { - $self->{log}->info("load_xml($url): $@"); + if ( my $err = $@ ) { + if ( $err =~ m{extra content at the end}i ) { + + # We requested XML, but received an HTML error page + # (which was returned with HTTP 200 OK). + $self->{log}->debug("load_xml($url): $err"); + } + else { + # There is invalid XML which we might be able to fix via + # regular expressions, so dump it into the production log. + $self->{log}->info("load_xml($url): $err"); + } $cache->freeze( $url, $traininfo ); - $promise->resolve($traininfo); + $promise->reject("hafas->get_xml_p($url): $err"); return; } |