diff options
Diffstat (limited to 'lib/Travel/Status/DE/IRIS/Stations.pm.PL')
-rw-r--r-- | lib/Travel/Status/DE/IRIS/Stations.pm.PL | 66 |
1 files changed, 45 insertions, 21 deletions
diff --git a/lib/Travel/Status/DE/IRIS/Stations.pm.PL b/lib/Travel/Status/DE/IRIS/Stations.pm.PL index 3547448..edfcf63 100644 --- a/lib/Travel/Status/DE/IRIS/Stations.pm.PL +++ b/lib/Travel/Status/DE/IRIS/Stations.pm.PL @@ -10,6 +10,9 @@ use JSON; my $json_str = read_file('share/stations.json'); my $stations = JSON->new->utf8->decode($json_str); +my $meta_str = read_file('share/meta.json'); +my $meta = JSON->new->utf8->decode($meta_str); + my $buf = <<'EOF'; package Travel::Status::DE::IRIS::Stations; @@ -23,19 +26,15 @@ use warnings; use 5.014; use utf8; -use Geo::Distance; +use GIS::Distance; use List::Util qw(min); use List::UtilsBy qw(uniq_by); use List::MoreUtils qw(firstval pairwise); use Text::LevenshteinXS qw(distance); -# TODO Geo::Distance is kinda deprecated, it is recommended to use GIS::Distance -# instead. However, since GIS::Distance is not packaged for Debian, I'll stick -# with Geo::Distance for now (which works fine enough here) - # TODO switch to Text::Levenshtein::XS once AUR/Debian packages become available -our $VERSION = '1.51'; +our $VERSION = '1.96'; # Automatically generated, see share/stations.json my @stations = ( @@ -59,10 +58,25 @@ for my $station ( @{$stations} ) { $buf .= <<'EOF'; ); +# Automatically generated, see share/meta.json +my $meta = { +EOF + +for my $eva ( keys %{$meta} ) { + $buf .= sprintf( "%s => [%s],\n", $eva, join( q{,}, @{ $meta->{$eva} } ) ); +} + +$buf .= <<'EOF'; +}; + sub get_stations { return @stations; } +sub get_meta { + return $meta; +} + sub normalize { my ($val) = @_; @@ -80,6 +94,10 @@ sub normalize { sub get_station { my ($name) = @_; + if (not $name) { + return; + } + my $ds100_match = firstval { $name eq $_->[0] } @stations; if ($ds100_match) { @@ -100,7 +118,7 @@ sub get_station_by_location { $num_matches //= 10; - my $geo = Geo::Distance->new(); + my $dist = GIS::Distance->new(); # we only use geolocations inside germany. # For these, this fast preprocessing step will let through all @@ -111,9 +129,8 @@ sub get_station_by_location { and abs( $_->[4] - $lat ) < 1 } @stations; - my @distances - = map { $geo->distance( 'kilometer', $lon, $lat, $_->[3], $_->[4] ) } - @candidates; + my @distances = map + { $dist->distance_metal( $lat, $lon, $_->[4], $_->[3] ) } @candidates; my @station_map = pairwise { [ $a, $b ] } @candidates, @distances; @station_map = sort { $a->[1] <=> $b->[1] } @station_map; @@ -125,6 +142,10 @@ sub get_station_by_location { sub get_station_by_name { my ($name) = @_; + if (not $name) { + return; + } + my $nname = lc($name); my $actual_match = firstval { $nname eq lc( $_->[1] ) } @stations; @@ -183,7 +204,7 @@ Travel::Status::DE::IRIS::Stations - Station name to station code mapping =head1 VERSION -version 1.51 +version 1.96 =head1 DESCRIPTION @@ -212,6 +233,10 @@ that it may contain space characters. =back +Note that station names are not unique. +A single station may be present multiple times with different EVA numbers and DS100 codes. +At the moment, EVA numbers and DS100 codes are unique. + =head1 METHODS =over @@ -224,7 +249,7 @@ Returns a list of all known stations, lexically sorted by station name. Returns a list of stations matching I<$in>. -If a I<$in> is a valid station (either DS100 code or EVA number), +If a I<$in> is a valid station identifier (either DS100 code or EVA number), a single array reference describing the station is returned. Otherwise, I<$in> is passed to get_station_by_name(I<$in>) (see below). @@ -237,18 +262,17 @@ returns the closest I<$num_matches> (defaults to 10) matches. Note that stations which are located more than 70 kilometers away from I<$lon>/I<$lat> may be ignored when computing the closest matches. -Note that location-based lookup is only supported for stations inside Germany, -since the station list data source does not provide geolocation data for -non-german stations. - =item Travel::Status::DE::IRIS::Stations::get_station_by_name(I<$name>) Returns a list of stations where the station name matches I<$name>. Matching happens in two steps: If a case-insensitive exact match exists, only -this one is returned. Otherwise, all stations whose name contains I<$name> as -a substring (also case-insensitive) and all stations whose name has a low -Levenshtein distance to I<$name> are returned. +this one is returned. For station names that correspond to several EVA/DS100 +codes, the match with the lowest EVA number is returned. + +Otherwise, all stations whose name contains I<$name> as a substring (also +case-insensitive) and all stations whose name has a low Levenshtein distance to +I<$name> are returned. This two-step behaviour makes sure that not-prefix-free stations can still be matched directly. For instance, both "Essen-Steele" and "Essen-Steele Ost" @@ -264,7 +288,7 @@ None. =over -=item * Geo::Distance(3pm) +=item * GIS::Distance(3pm) =item * List::MoreUtils(3pm) @@ -287,7 +311,7 @@ Travel::Status::DE::IRIS(3pm). Station data: Copyright (C) 2016 by DB Station&Service AG, Europaplatz 1, 10557 Berlin, Germany -Lookup code: Copyright (C) 2014-2020 by Daniel Friesel E<lt>derf@finalrewind.orgE<gt> +Lookup code: Copyright (C) 2014-2024 by Birte Kristina Friesel E<lt>derf@finalrewind.orgE<gt> =head1 LICENSE |