summaryrefslogtreecommitdiff
path: root/scripts/acronyms.pl
diff options
context:
space:
mode:
authorDaniel Friesel <derf@finalrewind.org>2015-09-14 21:07:10 +0200
committerDaniel Friesel <derf@finalrewind.org>2015-09-14 21:07:10 +0200
commitf06afc08e82be0eccbd3b438470503bedaac69d2 (patch)
tree529b6925334f7e928ce8c7dd451588b70b976c14 /scripts/acronyms.pl
parent3cccdc35bc5b4edcc97d486cf9ed50fc7b2ca82a (diff)
re-enable substring matching -> use both for station lookup
Diffstat (limited to 'scripts/acronyms.pl')
-rwxr-xr-xscripts/acronyms.pl21
1 files changed, 9 insertions, 12 deletions
diff --git a/scripts/acronyms.pl b/scripts/acronyms.pl
index de79b81..6ac2d16 100755
--- a/scripts/acronyms.pl
+++ b/scripts/acronyms.pl
@@ -16,6 +16,7 @@ use 5.014;
use utf8;
use List::Util qw(min);
+use List::UtilsBy qw(uniq_by);
use List::MoreUtils qw(firstval pairwise);
use Text::LevenshteinXS qw(distance);
@@ -125,7 +126,7 @@ sub normalize {
}
sub get_station {
- my ( $name ) = @_;
+ my ($name) = @_;
my $ds100_match = firstval { $name eq $_->[0] } @stations;
@@ -137,34 +138,30 @@ sub get_station {
}
sub get_station_by_name {
- my ( $name ) = @_;
+ my ($name) = @_;
my $nname = lc($name);
- my $actual_match = firstval { $nname eq lc($_->[1]) } @stations;
+ my $actual_match = firstval { $nname eq lc( $_->[1] ) } @stations;
if ($actual_match) {
return ($actual_match);
}
$nname = normalize($nname);
- $actual_match = firstval { $nname eq normalize(lc($_->[1])) } @stations;
+ $actual_match = firstval { $nname eq normalize( lc( $_->[1] ) ) } @stations;
if ($actual_match) {
return ($actual_match);
}
my @distances = map { distance( $nname, $_->[1] ) } @stations;
my $min_dist = min(@distances);
- my $minp1_dist = min( grep { $_ != $min_dist } @distances );
my @station_map = pairwise { [ $a, $b ] } @stations, @distances;
- # arbitrary selection: edit distance < 5 is probably a typo, >= 5
- # probably means the station does not exist / has an odd name
- if ( $min_dist < 5 ) {
- return map { $_->[0] } grep { $_->[1] == $min_dist } @station_map;
- }
+ my @substring_matches = grep { $_->[1] =~ m{$name}i } @stations;
+ my @levenshtein_matches
+ = map { $_->[0] } grep { $_->[1] == $min_dist } @station_map;
- # always return a list when the edit distance is large
- return map { $_->[0] } grep { $_->[1] <= $minp1_dist } @station_map;
+ return uniq_by { $_->[0] } ( @substring_matches, @levenshtein_matches );
}
1;