From 8a07867b705e81db248e6fab4f75e9d2b0315f1e Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Tue, 24 Dec 2013 16:10:52 +0100 Subject: Remove fuzzy matching from API, add get_stop_by_name instead --- Changelog | 4 +++- bin/aseag-m | 24 +++++++++++++++++++ lib/Travel/Status/DE/URA.pm | 46 +++++++++++++++++------------------ t/20-aseag.t | 58 ++++++++++++++++----------------------------- 4 files changed, 70 insertions(+), 62 deletions(-) diff --git a/Changelog b/Changelog index 32e9202..9471a62 100644 --- a/Changelog +++ b/Changelog @@ -1,6 +1,8 @@ git HEAD - * Fix spaces in stop names with fuzzy => 1 + * Remove fuzzy matching from constructor / ->results API. Use + the new get_stop_by_name function instead (returns all matching stop). + Update aseag-m accordingly Travel::Status::DE::URA 0.01 - Sun Dec 22 2013 diff --git a/bin/aseag-m b/bin/aseag-m index 52cd946..40f8898 100755 --- a/bin/aseag-m +++ b/bin/aseag-m @@ -94,6 +94,26 @@ sub display_result { return; } +sub get_exact_stop_name { + my ($fuzzy_name) = @_; + + my @stops = $status->get_stop_by_name($fuzzy_name); + + if ( @stops == 0 ) { + say STDERR "No stops match '$fuzzy_name'"; + exit(1); + } + elsif ( @stops == 1 ) { + return $stops[0]; + } + else { + say STDERR "The stop '$fuzzy_name' is ambiguous. Please choose one " + . 'of the following:'; + say STDERR join( "\n", @stops ); + exit(1); + } +} + sub show_results { my @output; @@ -169,6 +189,10 @@ if ( my $err = $status->errstr ) { exit 2; } +$stop_name = get_exact_stop_name($stop_name); +if ($via) { + $via = get_exact_stop_name($via); +} show_results(); __END__ diff --git a/lib/Travel/Status/DE/URA.pm b/lib/Travel/Status/DE/URA.pm index 4683f34..99c0132 100644 --- a/lib/Travel/Status/DE/URA.pm +++ b/lib/Travel/Status/DE/URA.pm @@ -11,7 +11,7 @@ our $VERSION = '0.01'; use Carp qw(confess cluck); use DateTime; use Encode qw(encode decode); -use List::MoreUtils qw(none); +use List::MoreUtils qw(firstval none uniq); use LWP::UserAgent; use Travel::Status::DE::URA::Result; @@ -28,7 +28,6 @@ sub new { ura_base => $opt{ura_base}, ura_version => $opt{ura_version}, full_routes => $opt{full_routes} // 0, - fuzzy => $opt{fuzzy} // 1, hide_past => $opt{hide_past} // 1, stop => $opt{stop}, via => $opt{via}, @@ -67,7 +66,6 @@ sub new_from_raw { ura_base => $opt{ura_base}, ura_version => $opt{ura_version}, full_routes => $opt{full_routes} // 0, - fuzzy => $opt{fuzzy} // 1, hide_past => $opt{hide_past} // 1, stop => $opt{stop}, via => $opt{via}, @@ -90,28 +88,34 @@ sub parse_raw_data { # first field == 4 => version information, no departure if ( substr( $dep, 0, 1 ) != 4 ) { - push( @{ $self->{raw_list} }, [ split( /"?,"?/, $dep ) ] ); + my @fields = split( /"?,"?/, $dep ); + push( @{ $self->{raw_list} }, \@fields ); + push( @{ $self->{stop_names} }, $fields[1] ); } } + @{ $self->{stop_names} } = uniq @{ $self->{stop_names} }; + return $self; } -sub errstr { - my ($self) = @_; +sub get_stop_by_name { + my ( $self, $name ) = @_; - return $self->{errstr}; + my $nname = lc($name); + my $actual_match = firstval { $nname eq lc($_) } @{ $self->{stop_names} }; + + if ($actual_match) { + return $actual_match; + } + + return ( grep { $_ =~ m{$name}i } @{ $self->{stop_names} } ); } -sub is_my_stop { - my ( $self, $stop, $my_stop, $fuzzy ) = @_; +sub errstr { + my ($self) = @_; - if ($fuzzy) { - return ( $stop =~ m{$my_stop}i ? 1 : 0 ); - } - else { - return ( $stop eq $my_stop ); - } + return $self->{errstr}; } sub results { @@ -119,7 +123,6 @@ sub results { my @results; my $full_routes = $opt{full_routes} // $self->{full_routes} // 0; - my $fuzzy = $opt{fuzzy} // $self->{fuzzy} // 1; my $hide_past = $opt{hide_past} // $self->{hide_past} // 1; my $stop = $opt{stop} // $self->{stop}; my $via = $opt{via} // $self->{via}; @@ -139,7 +142,7 @@ sub results { ) = @{$dep}; my @route; - if ( $stop and not $self->is_my_stop( $stopname, $stop, $fuzzy ) ) { + if ( $stop and not( $stopname eq $stop ) ) { next; } @@ -172,7 +175,7 @@ sub results { } if ( $via - and none { $self->is_my_stop( $_->[1], $via, $fuzzy ) } @route ) + and none { $_->[1] eq $via } @route ) { next; } @@ -316,11 +319,6 @@ set. B / B limits the timetable to stops before / after the stop I (if set). -=item B => I (default 1) - -A true value allows fuzzy matching for the stop I, a false one -requires an exact string match. - =item B => I (default 1) Do not include past departures in the result list and the computed timetables. @@ -334,7 +332,7 @@ Only return departures at stop I. Only return departures containing I in their route. If B is set, I must be in the route after the stop I. If, in addition to that, B is set to B, I must be in the route -before the stop I. Respects B. Implies C<< full_routes> => 'after' >> unless +before the stop I. Implies C<< full_routes> => 'after' >> unless B is explicitly set to B / B / 1. =back diff --git a/t/20-aseag.t b/t/20-aseag.t index bbdeb6c..58a324e 100644 --- a/t/20-aseag.t +++ b/t/20-aseag.t @@ -7,7 +7,7 @@ use utf8; use Encode qw(decode); use File::Slurp qw(slurp); use List::Util qw(first); -use Test::More tests => 40; +use Test::More tests => 38; BEGIN { use_ok('Travel::Status::DE::ASEAG'); @@ -37,55 +37,44 @@ is( @results, 16208, 'All departures parsed and returned' ); @results = $s->results( hide_past => 1 ); is( @results, 0, 'hide_past => 1 returns nothing' ); -# fuzzy matching: bushof should return Aachen Bushof and Eschweiler Bushof -# (459 results) +# fuzzy matching: bushof should return Aachen Bushof, Eschweiler Bushof, +# Eupon Bushof -@results = $s->results( stop => 'bushof' ); +my @fuzzy = $s->get_stop_by_name('bushof'); -is( @results, 459, '"bushof" fuzzy-matches 459 stops' ); -ok( - ( first { $_->stop eq 'Aachen Bushof' } @results ), - '"bushof" fuzzy-matches "Aachen Bushof"' -); -ok( - ( first { $_->stop eq 'Eschweiler Bushof' } @results ), - '"bushof" fuzzy-matches "Eschweiler Bushof"' -); -ok( - ( first { $_->stop eq 'Eupen Bushof' } @results ), - '"bushof" fuzzy-matches "Eupen Bushof"' -); -is( - ( - first { - not( $_->stop eq 'Aachen Bushof' - or $_->stop eq 'Eschweiler Bushof' - or $_->stop eq 'Eupen Bushof' ); - } - @results - ), - undef, - '"bushof" does not match anything else' -); +is_deeply(\@fuzzy, ['Aachen Bushof', 'Eschweiler Bushof', 'Eupen Bushof'], +'fuzzy match for "bushof" works'); + +# fuzzy matching: whitespaces work + +@fuzzy = $s->get_stop_by_name('Aachen Bushof'); + +is_deeply(\@fuzzy, ['Aachen Bushof'], +'fuzzy match with exact name "Aachen Bushof" works'); + +# fuzzy matching: exact name only matches one, even though longer alternatives +# exist + +@fuzzy = $s->get_stop_by_name('brand'); + +is_deeply(\@fuzzy, ['Brand'], +'fuzzy match with exact name "brand" works'); # exact matching: bushof should match nothing @results = $s->results( stop => 'bushof', - fuzzy => 0 ); is( @results, 0, '"bushof" matches nothing' ); @results = $s->results( stop => 'aachen bushof', - fuzzy => 0 ); is( @results, 0, 'matching is case-sensitive' ); # exact matching: Aachen Bushof should work @results = $s->results( stop => 'Aachen Bushof', - fuzzy => 0 ); is( @results, 375, '"Aachen Bushof" matches 375 stops' ); @@ -97,11 +86,9 @@ $s = Travel::Status::DE::ASEAG->new_from_raw( raw_str => $rawstr, hide_past => 0, stop => 'Aachen Bushof', - fuzzy => 0 ); @results = $s->results( stop => 'Aachen Bushof', - fuzzy => 0 ); is( @results, 375, '"Aachen Bushof" matches 375 stops in constructor' ); is( ( first { $_->stop ne 'Aachen Bushof' } @results ), @@ -113,7 +100,6 @@ $s = Travel::Status::DE::ASEAG->new_from_raw( raw_str => $rawstr, hide_past => 0, stop => 'Aachen Bushof', - fuzzy => 0 ); @results = $s->results( via => 'Finkensief' ); @@ -150,7 +136,6 @@ $s = Travel::Status::DE::ASEAG->new_from_raw( raw_str => $rawstr, hide_past => 0, stop => 'Aachen Bushof', - fuzzy => 0 ); @results = $s->results( via => 'Finkensief', @@ -190,7 +175,6 @@ $s = Travel::Status::DE::ASEAG->new_from_raw( raw_str => $rawstr, hide_past => 0, stop => 'Aachen Bushof', - fuzzy => 0 ); @results = $s->results( via => 'Finkensief', -- cgit v1.2.3