From 5f1502c0c280860719cae3f361dc15551c73c019 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Fri, 15 May 2015 12:46:53 +0200 Subject: Update Stations.pm and acromys.{pl,sh}. Fix 9 code/name combos. --- Changelog | 3 ++ lib/Travel/Status/DE/IRIS/Stations.pm | 23 ++++++---- scripts/acronyms.pl | 85 ++++++++++++++++++++++++++++++----- scripts/acronyms.sh | 2 +- 4 files changed, 91 insertions(+), 22 deletions(-) diff --git a/Changelog b/Changelog index 0b88685..89ab9d3 100644 --- a/Changelog +++ b/Changelog @@ -2,6 +2,9 @@ git HEAD * IRIS->new: Fix default lwp_options value (was documented, but not used) * Result->route_interesting: Also consider airports + * Stations: Update DS100 code list. Fixes 9 stations in Köln, Düsseldorf + and Wuppertal which had a wrong code/name combination assigned due to + a parser error Travel::Status::DE::IRIS 1.00 - Fri May 01 2015 diff --git a/lib/Travel/Status/DE/IRIS/Stations.pm b/lib/Travel/Status/DE/IRIS/Stations.pm index 68c42c3..4696129 100644 --- a/lib/Travel/Status/DE/IRIS/Stations.pm +++ b/lib/Travel/Status/DE/IRIS/Stations.pm @@ -1206,6 +1206,7 @@ my @stations = ( [ 'BBST', 'Bestensee' ], [ 'EBWG', 'Bestwig' ], [ 'DNOB', 'Betonwerk Oeton' ], + [ 'FKBH1', 'Betrh Sandh Str' ], [ 'XLXBB', 'Bettembg Gr FR' ], [ 'XLB', 'Bettembourg' ], [ 'NBMS', 'Bettmannsäge' ], @@ -2641,16 +2642,16 @@ my @stations = ( [ 'XPCWA', 'Czerwonka' ], [ 'XPCS', 'Czestoch Stradom' ], [ 'XPC', 'Czestochowa Osob' ], - [ 'KDA D', 'Abstellbf' ], + [ 'KDA', 'D Abstellbf' ], [ 'KDFF', 'D Flughafen' ], [ 'KDFFH', 'D Flughafen Hp' ], - [ 'KDF D', 'Flughafen Term' ], + [ 'KDF', 'D Flughafen Term' ], [ 'KDF A', 'D Flughf W 884' ], - [ 'KDH D', 'Hafen' ], + [ 'KDH', 'D Hafen' ], [ 'KDU K', 'D Karthäuser Weg' ], [ 'KDVS', 'D Völklinger Str' ], - [ 'KDV D', 'Volksgarten' ], - [ 'KDW D', 'Wehrhahn' ], + [ 'KDV', 'D Volksgarten' ], + [ 'KDW', 'D Wehrhahn' ], [ 'FDAL', 'Da Lichtwiese' ], [ 'EDAD', 'Daaden' ], [ 'FDA', 'Da-Arheilgen' ], @@ -4758,6 +4759,7 @@ my @stations = ( [ 'SGB', 'Göttelborn' ], [ 'DGOE', 'Göttengrün-Gef' ], [ 'RGH', 'Gottenheim' ], + [ 'RGHG', 'DB/SWEG' ], [ 'NGZ', 'Gotteszell' ], [ 'HG', 'Göttingen' ], [ 'HG G', 'Göttingen Gbf' ], @@ -6431,7 +6433,7 @@ my @stations = ( [ 'XRJU', 'Jurdani' ], [ 'XFJU', 'Juvisy' ], [ 'KKBP', 'K Barbarossapl' ], - [ 'KBP K', 'Businesspark' ], + [ 'KBP', 'K Businesspark' ], [ 'KKE N', 'K Eifelt Bez II' ], [ 'KKE M', 'K Eifelt Bez III' ], [ 'KKE S', 'K Eifelt Bez IV' ], @@ -6439,7 +6441,7 @@ my @stations = ( [ 'KKE F', 'K Eifeltor Emf' ], [ 'KKE B', 'K Eifeltor Enf' ], [ 'KKE K', 'K Eifeltor Esf' ], - [ 'KKP K', 'Geldernstr/Pa' ], + [ 'KKP', 'K Geldernstr/Pa' ], [ 'KKHRW', 'K Hansar Wendean' ], [ 'KKLP', 'K Klettenbergp' ], [ 'AKSM', 'K Seefischmarkt' ], @@ -9434,6 +9436,7 @@ my @stations = ( [ 'TNST', 'Neuenstadt/Koch.' ], [ 'TNN', 'Neuenstein' ], [ 'KNEW', 'Neuer Weg' ], + [ 'KQNE', 'NRW/RP' ], [ 'MNF', 'Neufahrn b Frei' ], [ 'MNFR', 'Neufahrn/Nbay' ], [ 'XBNC', 'Neufchateau' ], @@ -14172,8 +14175,8 @@ my @stations = ( [ 'XCVY', 'Vyrica' ], [ 'XTVP', 'Vysoka Pec' ], [ 'HWRG', 'W Böhme DB-Gr' ], - [ 'KWR W', 'Rauenthal' ], - [ 'KWZ W', 'Zool Garten' ], + [ 'KWR', 'W Rauenthal' ], + [ 'KWZ', 'W Zool Garten' ], [ 'EWFR', 'Wa Unser Fritz' ], [ 'FWAB', 'Wabern (Bz Ksl)' ], [ 'HWAD', 'Wachendorf' ], @@ -14719,6 +14722,7 @@ my @stations = ( [ 'SWIN', 'Wincheringen' ], [ 'EWIL', 'Windelsbleiche' ], [ 'RWND', 'Winden (Pfalz)' ], + [ 'FQWH', 'NRW/RP' ], [ 'HWDH', 'Windheim (Weser)' ], [ 'NWB', 'Windischeschenb' ], [ 'XAWIG', 'Windischgarsten' ], @@ -14894,6 +14898,7 @@ my @stations = ( [ 'UWFN', 'Wünschendorf N' ], [ 'BWUE', 'Wünsdorf-Waldst' ], [ 'NWR A', 'Wür Rbf Ausfahrt' ], + [ 'HQWS', 'NRW/HE' ], [ 'EWGD', 'Würgendorf' ], [ 'EWGT', 'Würgendorf Ort' ], [ 'NWHO', 'Wür-Heidingsf O' ], diff --git a/scripts/acronyms.pl b/scripts/acronyms.pl index d2326e9..6dfba4b 100755 --- a/scripts/acronyms.pl +++ b/scripts/acronyms.pl @@ -3,14 +3,9 @@ use strict; use warnings; use 5.010; - -my $re_line = qr{ - ^ - (? [A-Z]{2}[A-Z ]{0,3} ) - \s - (? .+) - $ -}x; +use Encode qw(decode encode); +use List::Util qw(max sum); +use List::MoreUtils qw(true); say <<'EOF'; package Travel::Status::DE::IRIS::Stations; @@ -27,15 +22,81 @@ our $VERSION = '1.00'; my @stations = ( EOF +my @buf; + +sub process_block { + my @histogram; + my @borders = (0); + my $run = 0; + + my $length = max (map { length($_) } @buf); + + for my $i (0 .. $length) { + $histogram[$i] = true { length($_) < $i or substr($_, $i, 1) eq q{ } } @buf; + + if ($histogram[$i] == @buf) { + if (not $run) { + push(@borders, $i); + $run = 1; + } + } + else { + $run = 0; + } + } + for my $i (0 .. $#borders / 2) { + for my $line (@buf) { + my $station_offset = $borders[2 * $i]; + my $name_offset = $borders[2 * $i + 1]; + my $station_length = $name_offset - $station_offset; + my $name_length = $borders[2 * $i + 2] ? ($borders[2 * $i + 2] - $name_offset) : undef; + + if (length($line) < $station_offset) { + next; + } + + my $station = substr($line, $station_offset, $station_length); + my $name = $name_length ? substr($line, $name_offset, $name_length) : substr($line, $name_offset); + + $station =~ s{^\s+}{}; + $station =~ s{\s+$}{}; + $station =~ s{\s+}{ }g; + $name =~ s{!}{ }g; + $name =~ s{^\s+}{}; + $name =~ s{\s+$}{}; + $name =~ s{\s+}{ }g; + $name =~ s{'}{\\'}g; + + if (length($station) == 0) { + next; + } + + printf("\t['%s','%s'],\n", encode('UTF-8', $station), encode('UTF-8', $name)); + } + } +} + while (my $line = ) { chomp $line; + $line = decode('UTF-8', $line); - if ($line =~ $re_line) { - my ($station, $name) = @+{qw{acronym name}}; - $name =~ s{'}{\\'}g; + if (length($line) == 0 and @buf) { + process_block(); + @buf = (); + } - printf("\t['%s','%s'],\n", $station, $name); + if ($line !~ m{ ^ [A-Z]{2} }x and $line !~ m{ \s [A-Z]{2,5} \s }x) { + next; } + + $line =~ s{RB-Gr km}{RB-Gr!km}g; + $line =~ s{RB-Gr!km\s++}{RB-Gr!km!}g; + $line =~ s{Bad }{Bad!}g; + + push(@buf, $line); +} +if (@buf) { + process_block(); } say <<'EOF'; diff --git a/scripts/acronyms.sh b/scripts/acronyms.sh index 95d2ea7..71f90d1 100755 --- a/scripts/acronyms.sh +++ b/scripts/acronyms.sh @@ -1,5 +1,5 @@ #!/bin/sh curl -s http://fahrweg.dbnetze.com/file/fahrweg-de/2394144/vHBDX5OndmGwv-JTA9EzuNArX1E/2361656/data/betriebsstellen.pdf \ -| pdftotext -raw - - | perl scripts/acronyms.pl \ +| pdftotext -layout - - | perl scripts/acronyms.pl \ > lib/Travel/Status/DE/IRIS/Stations.pm -- cgit v1.2.3