From 985001e8b4fff4b2c7dcf3ebd8d26a178e1cf561 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Mon, 11 Jan 2021 22:09:32 +0100 Subject: filter train duplicates by valid date range --- bin/db-zugbildung-to-json | 74 +++++++++++++++++++++++++++++++++++++++++++---- bin/umlauf-to-dot | 60 +++++++++++++++----------------------- schema.yml | 7 ++++- 3 files changed, 99 insertions(+), 42 deletions(-) diff --git a/bin/db-zugbildung-to-json b/bin/db-zugbildung-to-json index 4721d31..6159a0e 100755 --- a/bin/db-zugbildung-to-json +++ b/bin/db-zugbildung-to-json @@ -8,6 +8,8 @@ use warnings; use 5.020; use utf8; +use DateTime; +use DateTime::Format::Strptime; use Encode qw(decode); use File::Slurp qw(write_file); use IPC::Run3; @@ -25,6 +27,32 @@ sub show_usage { exit $exit_code; } +my $strp = DateTime::Format::Strptime->new( + pattern => '%F', + time_zone => 'Europe/Berlin' +); +my $now = DateTime->now( time_zone => 'Europe/Berlin' ); + +sub range_is_today { + my ($range) = @_; + + if ( $range =~ m{^(.*)/(.*)$} ) { + my $dt1 = $strp->parse_datetime($1); + my $dt2 = $strp->parse_datetime($2); + if ( $dt1 and $dt2 and $dt1->epoch < $now->epoch < $dt2->epoch ) { + return 1; + } + } + else { + if ( my $dt = $strp->parse_datetime($range) ) { + if ( $dt->ymd eq $now->ymd ) { + return 1; + } + } + } + return; +} + my @weekdays = (qw(Mo Di Mi Do Fr Sa So)); my @months = (qw(0 I II III IV V VI VII VIII IX X XI XII)); my %weekday = map { ( $weekdays[$_] => $_ ) } ( 0 .. $#weekdays ); @@ -193,7 +221,7 @@ sub parse_condition { my %ret; if ( $line - =~ m{ ^ \s* (? .*? ) \s - \s (? [^,]+ ) , \s* (? (Mo|Di|Mi|Do|Fr|Sa|So|tgl[.]|[+]|-)+) \s* (? .* ) $ }x + =~ m{ ^ \s* (? .*? ) \s - \s (? [^,]+ ) , \s* (? N \s* )? (? (Mo|Di|Mi|Do|Fr|Sa|So|tgl[.]|[+]|-)+) \s* (? .* ) $ }x ) { $ret{from} = $+{from}; @@ -726,12 +754,48 @@ for my $bork (qw(104 1588 1700 77689 78112 939 2310)) { } } +my %smap; +for my $train_no ( keys %map ) { + if ( @{ $map{$train_no} } == 1 ) { + $smap{$train_no} = $map{$train_no}[0]; + } + else { + my $latest_valid; + my $valid_count = 0; + for my $train ( @{ $map{$train_no} } ) { + my $is_valid = 0; + for my $schedule ( @{ $train->{schedules} // [] } ) { + for my $valid ( @{ $schedule->{valid} // [] } ) { + if ( range_is_today($valid) ) { + $is_valid = 1; + } + } + + # invalid may override valid for certain days + for my $invalid ( @{ $schedule->{invalid} // [] } ) { + if ( range_is_today($invalid) ) { + $is_valid = 0; + } + } + } + if ($is_valid) { + $latest_valid = $train; + $valid_count++; + } + } + if ( $valid_count == 1 ) { + $smap{$train_no} = $latest_valid; + } + } +} + # use canonical output (i.e., sort hash keys) to allow for easy diffing. say JSON->new->utf8->canonical->encode( { - deprecated => \0, - source => $wr_name, - train => {%map}, - valid => $valid, + deprecated => \0, + source => $wr_name, + train => {%smap}, + train_variants => {%map}, + valid => $valid, } ); diff --git a/bin/umlauf-to-dot b/bin/umlauf-to-dot index 860c962..47a766c 100755 --- a/bin/umlauf-to-dot +++ b/bin/umlauf-to-dot @@ -67,11 +67,10 @@ sub build_cycle { my @candidates; - for my $train ( @{ $map->{$train_number} } ) { - if ( my $c = $train->{cycle}{$cycle_id} ) { - push( @candidates, @{ $c->{from} // [] } ); - push( @candidates, @{ $c->{to} // [] } ); - } + my $train = $map->{$train_number}; + if ( my $c = $train->{cycle}{$cycle_id} ) { + push( @candidates, @{ $c->{from} // [] } ); + push( @candidates, @{ $c->{to} // [] } ); } @candidates = uniq @candidates; @@ -89,39 +88,30 @@ sub build_cycle { my @output; for my $train_number (@train_numbers) { - for my $train ( @{ $map->{$train_number} } ) { - if ( my $c = $train->{cycle}{$cycle_id} ) { - for my $from ( @{ $c->{from} // [] } ) { - push( - @output, - sprintf( - "%s -> %s;", - format_train( $from, $map->{$from}[0] ), - format_train( - $train_number, $map->{$train_number}[0] - ) - ) - ); - } - for my $to ( @{ $c->{to} // [] } ) { - push( - @output, - sprintf( - "%s -> %s;", - format_train( - $train_number, $map->{$train_number}[0] - ), - format_train( $to, $map->{$to}[0] ) - ) - ); - } + my $train = $map->{$train_number}; + if ( my $c = $train->{cycle}{$cycle_id} ) { + for my $from ( @{ $c->{from} // [] } ) { + push( + @output, + sprintf( "%s -> %s;", + format_train( $from, $map->{$from} ), + format_train( $train_number, $map->{$train_number} ) ) + ); + } + for my $to ( @{ $c->{to} // [] } ) { + push( + @output, + sprintf( "%s -> %s;", + format_train( $train_number, $map->{$train_number} ), + format_train( $to, $map->{$to} ) ) + ); } } if ( $train_number != $line ) { push( @output, sprintf( "%s [shape=box];", - format_train( $train_number, $map->{$train_number}[0] ) ) + format_train( $train_number, $map->{$train_number} ) ) ); } } @@ -131,9 +121,7 @@ sub build_cycle { my @cycle_ids; -for my $train ( @{ $map->{$line} } ) { - push( @cycle_ids, keys %{ $train->{cycle} // {} } ); -} +push( @cycle_ids, keys %{ $map->{$line}{cycle} // {} } ); say "digraph Umlauf {"; @@ -141,6 +129,6 @@ for my $cycle_id (@cycle_ids) { say join( "\n", uniq build_cycle( $line, $cycle_id ) ); } -printf( "%s [style=bold];\n", format_train( $line, $map->{$line}[0] ) ); +printf( "%s [style=bold];\n", format_train( $line, $map->{$line} ) ); say "}" diff --git a/schema.yml b/schema.yml index 0cbc85d..b7542f9 100644 --- a/schema.yml +++ b/schema.yml @@ -38,7 +38,12 @@ components: description: ISO 8601 interval describing when this train composition dataset is valid train: type: object - description: dict mapping train numbers to train objects + description: dict mapping train numbers to probably valid train objects + additionalProperties: + $ref: '#/components/schemas/train' + train_variants: + type: object + description: dict mapping train numbers to list of possible train objects additionalProperties: type: array items: -- cgit v1.2.3