summaryrefslogtreecommitdiff
path: root/lib/Travel/Status/DE
diff options
context:
space:
mode:
authorDaniel Friesel <derf@finalrewind.org>2015-09-12 14:03:09 +0200
committerDaniel Friesel <derf@finalrewind.org>2015-09-12 14:03:09 +0200
commit3cccdc35bc5b4edcc97d486cf9ed50fc7b2ca82a (patch)
treedead79a7978971ebe80c84b75d2d4559360d5e14 /lib/Travel/Status/DE
parent4caa67e1f2ca8ec055acf24f04d99da028e5c06f (diff)
use Text::Levenshtein(XS) for fuzzy station name matching
Diffstat (limited to 'lib/Travel/Status/DE')
-rw-r--r--lib/Travel/Status/DE/IRIS/Stations.pm24
1 files changed, 22 insertions, 2 deletions
diff --git a/lib/Travel/Status/DE/IRIS/Stations.pm b/lib/Travel/Status/DE/IRIS/Stations.pm
index 7fdd6cc..2c8b8b4 100644
--- a/lib/Travel/Status/DE/IRIS/Stations.pm
+++ b/lib/Travel/Status/DE/IRIS/Stations.pm
@@ -5,7 +5,11 @@ use warnings;
use 5.014;
use utf8;
-use List::MoreUtils qw(firstval);
+use List::Util qw(min);
+use List::MoreUtils qw(firstval pairwise);
+use Text::LevenshteinXS qw(distance);
+
+# TODO switch to Text::Levenshtein::XS once AUR/Debian packages become available
our $VERSION = '1.02';
@@ -15265,7 +15269,19 @@ sub get_station_by_name {
return ($actual_match);
}
- return ( grep { $_->[1] =~ m{$name}i } @stations );
+ my @distances = map { distance( $nname, $_->[1] ) } @stations;
+ my $min_dist = min(@distances);
+ my $minp1_dist = min( grep { $_ != $min_dist } @distances );
+ my @station_map = pairwise { [ $a, $b ] } @stations, @distances;
+
+ # arbitrary selection: edit distance < 5 is probably a typo, >= 5
+ # probably means the station does not exist / has an odd name
+ if ( $min_dist < 5 ) {
+ return map { $_->[0] } grep { $_->[1] == $min_dist } @station_map;
+ }
+
+ # always return a list when the edit distance is large
+ return map { $_->[0] } grep { $_->[1] <= $minp1_dist } @station_map;
}
1;
@@ -15355,6 +15371,10 @@ None.
=item * List::MoreUtils(3pm)
+=item * List::Util(3pm)
+
+=item * Text::LevenshteinXS(3pm)
+
=back
=head1 BUGS AND LIMITATIONS