From 19b8380caa2ffa54f32c67d81197ced5d79cc3c6 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Mon, 21 Dec 2020 13:48:15 +0100 Subject: improve (in)valid date parser --- bin/db-zugbildung-to-json | 60 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/bin/db-zugbildung-to-json b/bin/db-zugbildung-to-json index 759f2f7..722c618 100755 --- a/bin/db-zugbildung-to-json +++ b/bin/db-zugbildung-to-json @@ -90,12 +90,12 @@ sub parse_dates { } if ( $mode eq 'range' - and $date =~ m{ ^ ab \s* (? \d{2} ) \. (? [^.]* ) }x ) + and $date =~ m{ ^ ab \s* (? \d{2} ) \. (? [IVX]* ) }x ) { push( @valid_ranges, [ [ $+{month}, $+{day} ], undef ] ); } elsif ( $mode eq 'range' - and $date =~ m{ ^ bis \s* (? \d{2} ) \. (? [^.]* ) }x ) + and $date =~ m{ ^ bis \s* (? \d{2} ) \. (? [IVX]* ) }x ) { push( @valid_ranges, [ undef, [ $+{month}, $+{day} ] ] ); } @@ -105,6 +105,16 @@ sub parse_dates { { push( @valid_ranges, [ [ $+{fm}, $+{fd} ], [ $+{tm}, $+{td} ] ] ); } + elsif ( $mode eq 'extra' + and $date =~ m{ (? \d{2} ) \. (? [IVX]* ) }x ) + { + push( @valid_dates, [ $+{month}, $+{day} ] ); + } + elsif ( $mode eq 'except' + and $date =~ m{ (? \d{2} ) \. (? [IVX]* ) }x ) + { + push( @invalid_dates, [ $+{month}, $+{day} ] ); + } } my %ret = ( @@ -128,6 +138,46 @@ sub parse_dates { push( @{ $ret{valid} }, "${from_date}/${through_date}" ); } + # for several dates of the same month, only the last date has the month set + # (e.g. "24., 31.XII."). Walk through the list in reverse so ensure the + # month is always available. + my $month = undef; + for my $date ( reverse @valid_dates ) { + if ( not $date->[0] and not $month ) { + say STDERR "Skipping unhandled valid date in \"$text\""; + next; + } + + # TODO Winterfahrplan: Use $year-1 for month XII + push( + @{ $ret{valid} }, + sprintf( '%04d-%02d-%02d', + $year, $month{ $date->[0] || $month }, + $date->[1] ) + ); + if ( $date->[0] ) { + $month = $date->[0]; + } + } + $month = undef; + for my $date ( reverse @invalid_dates ) { + if ( not $date->[0] and not $month ) { + say STDERR "Skipping unhandled invalid date in \"$text\""; + next; + } + + # TODO Winterfahrplan: Use $year-1 for month XII + push( + @{ $ret{invalid} }, + sprintf( '%04d-%02d-%02d', + $year, $month{ $date->[0] || $month }, + $date->[1] ) + ); + if ( $date->[0] ) { + $month = $date->[0]; + } + } + return %ret; } @@ -571,6 +621,12 @@ for my $train_number ( keys %map ) { } } +# Cleanup + +#for my $train (values %map) { +# delete $train->{wagonorder_notes}; +#} + # broken umlauf (indentation changes on page break) delete $map{104}{cycle}; delete $map{1588}{cycle}; -- cgit v1.2.3