From 5e967451211ecee9b7810311acdc96cafdfce228 Mon Sep 17 00:00:00 2001
From: Daniel Friesel <derf@finalrewind.org>
Date: Fri, 6 Nov 2020 11:23:22 +0100
Subject: HAFAS: Distinguish between disguised HTTP 404 and invalid XML

---
 lib/Travelynx/Helper/HAFAS.pm | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/Travelynx/Helper/HAFAS.pm b/lib/Travelynx/Helper/HAFAS.pm
index 6aefcf1..bee4cba 100644
--- a/lib/Travelynx/Helper/HAFAS.pm
+++ b/lib/Travelynx/Helper/HAFAS.pm
@@ -197,10 +197,20 @@ sub get_xml_p {
 			$body
 			  =~ s{<Attribute([^>]+)text="([^"]*)"([^"=]*)""}{<Attribute$1text="$2&#042;$3&#042;"}s;
 			eval { $tree = XML::LibXML->load_xml( string => $body ) };
-			if ($@) {
-				$self->{log}->info("load_xml($url): $@");
+			if ( my $err = $@ ) {
+				if ( $err =~ m{extra content at the end}i ) {
+
+					# We requested XML, but received an HTML error page
+					# (which was returned with HTTP 200 OK).
+					$self->{log}->debug("load_xml($url): $err");
+				}
+				else {
+					# There is invalid XML which we might be able to fix via
+					# regular expressions, so dump it into the production log.
+					$self->{log}->info("load_xml($url): $err");
+				}
 				$cache->freeze( $url, $traininfo );
-				$promise->resolve($traininfo);
+				$promise->reject("hafas->get_xml_p($url): $err");
 				return;
 			}
 
-- 
cgit v1.2.3