#!/usr/bin/env perl use strict; use warnings; use 5.010; use Encode qw(encode); use Text::CSV; say <<'EOF'; package Travel::Status::DE::IRIS::Stations; use strict; use warnings; use 5.014; use utf8; use List::Util qw(min); use List::UtilsBy qw(uniq_by); use List::MoreUtils qw(firstval pairwise); use Text::LevenshteinXS qw(distance); # TODO switch to Text::Levenshtein::XS once AUR/Debian packages become available our $VERSION = '1.05'; my @stations = ( EOF my $csv = Text::CSV->new({binary => 1, sep_char => q{;}}); while (my $line = ) { # chomp $line; # $line = decode('UTF-8', $line); my $status = $csv->parse($line); my @fields = $csv->fields; if ($fields[0] eq 'Abk') { next; } my ($station, $name, $country, $location, $valid_since) = @fields; $name =~ s{!}{ }g; $name =~ s{^\s+}{}; $name =~ s{\s+$}{}; $name =~ s{\s+}{ }g; $name =~ s{'}{\\'}g; printf("\t['%s','%s'],\n", encode('UTF-8', $station), encode('UTF-8', $name)); } say <<'EOF'; ); sub get_stations { return @stations; } sub normalize { my ($val) = @_; $val =~ s{Ä}{Ae}g; $val =~ s{Ö}{Oe}g; $val =~ s{Ü}{Ue}g; $val =~ s{ä}{ae}g; $val =~ s{ö}{oe}g; $val =~ s{ß}{sz}g; $val =~ s{ü}{ue}g; return $val; } sub get_station { my ($name) = @_; my $ds100_match = firstval { $name eq $_->[0] } @stations; if ($ds100_match) { return ($ds100_match); } return get_station_by_name($name); } sub get_station_by_name { my ($name) = @_; my $nname = lc($name); my $actual_match = firstval { $nname eq lc( $_->[1] ) } @stations; if ($actual_match) { return ($actual_match); } $nname = normalize($nname); $actual_match = firstval { $nname eq normalize( lc( $_->[1] ) ) } @stations; if ($actual_match) { return ($actual_match); } my @distances = map { distance( $nname, $_->[1] ) } @stations; my $min_dist = min(@distances); my @station_map = pairwise { [ $a, $b ] } @stations, @distances; my @substring_matches = grep { $_->[1] =~ m{$name}i } @stations; my @levenshtein_matches = map { $_->[0] } grep { $_->[1] == $min_dist } @station_map; return uniq_by { $_->[0] } ( @substring_matches, @levenshtein_matches ); } 1; __END__ =head1 NAME Travel::Status::DE::IRIS::Stations - Station name to station code mapping =head1 SYNOPSIS use Travel::Status::DE::IRIS::Stations; my $name = 'Essen Hbf'; my @stations = Travel::Status::DE::IRIS::Stations::get_station_by_name( $name); if (@stations < 1) { # no matching stations } elsif (@stations > 1) { # too many matches } else { printf("Input '%s' matched station code %s (as '%s')\n", $name, @{$stations[0]}); } =head1 VERSION version 0.00 =head1 DESCRIPTION This module contains a mapping of DeutscheBahn station names to station codes. A station name is a (perhaps slightly abbreviated) string naming a particular station; a station code is a two to five character denoting a station for the IRIS web service. Example station names (code in parentheses) are: "Essen HBf" (EE), "Aachen Schanz" (KASZ), "Do UniversitEt" (EDUV). B Station codes may contain whitespace. =head1 METHODS =over =item Travel::Status::DE::IRIS::get_stations Returns a list of [station code, station name] listrefs lexically sorted by station name. =item Travel::Status::DE::IRIS::get_station(I<$in>) Returns a list of [station code, station name] listrefs matching I<$in>. If a I<$in> is a valid station code, only one element ([I<$in>, related name]) is returned. Otherwise, it is passed to get_station_by_name(I<$in>) (see below). Note that station codes matching is case sensitive and must be exact. =item Travel::Status::DE::IRIS::get_station_by_name(I<$name>) Returns a list of [station code, station name] listrefs where the station name matches I<$name>. Matching happens in two steps: If a case-insensitive exact match exists, only this one is returned. Otherwise, all stations whose name contains I<$name> as a substring (also case-insensitive) are returned. This two-step behaviour makes sure that not prefix-free stations can still be matched directly. For instance, both "Essen-Steele" and "Essen-Steele Ost" are valid station names, but "essen-steele" will only return "Essen-Steele". =back =head1 DIAGNOSTICS None. =head1 DEPENDENCIES =over =item * List::MoreUtils(3pm) =item * List::Util(3pm) =item * Text::LevenshteinXS(3pm) =back =head1 BUGS AND LIMITATIONS There is no support for intelligent whitespaces (to also match "-" and similar) yet. =head1 SEE ALSO Travel::Status::DE::IRIS(3pm). =head1 AUTHOR Copyright (C) 2014-2015 by Daniel Friesel Ederf@finalrewind.orgE =head1 LICENSE This module is licensed under the same terms as Perl itself. EOF