From c6270546445c1c3954636a1c92d882d1b4b379bd Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Mon, 11 Jan 2021 19:31:35 +0100 Subject: respect cycle IDs when parsing cycle refs and building graphs --- bin/db-zugbildung-to-json | 20 ++++++++----- bin/umlauf-to-dot | 74 ++++++++++++++++++++++++++++++----------------- schema.yml | 20 +++++++------ 3 files changed, 70 insertions(+), 44 deletions(-) diff --git a/bin/db-zugbildung-to-json b/bin/db-zugbildung-to-json index d6bd897..b1f804d 100755 --- a/bin/db-zugbildung-to-json +++ b/bin/db-zugbildung-to-json @@ -527,32 +527,36 @@ for my $train ( values %map ) { if ( not $from_offset ) { next; } + my $cycle_id = "unknown"; for my $i_line ( @{ $train->{wagonorder_notes} } ) { # $i_line is an lvalue, so changes in $i_line end up in wagonorder_notes. # We don't want that. my $line = substr( $i_line, 0 ); - # Some cycles do not have a "from" entry, but they do have an odd - # F-identifier, which confuses the cycle detection code. remove it. - $line =~ s{F[0-9] [0-9]{3}}{.. ...}; + # Fx abc appears to identify cycle groups. + # It also confuses the cycle detection code for cycles which do not have + # a "from" entry. + if ( $line =~ s{(F[0-9] [0-9]{3})}{.. ...} ) { + $cycle_id = $1; + } if ( length($line) <= $from_offset ) { next; } my $umlauf = substr( $line, $from_offset ); if ( $umlauf =~ m{ ^ (\d+) }x ) { - push( @{ $train->{cycle}{from} }, $1 ); + push( @{ $train->{cycle}{$cycle_id}{from} }, $1 ); } if ( length($line) > $to_offset ) { $umlauf = substr( $line, $to_offset ); if ( $umlauf =~ m{ ^ (\d+) }x ) { - push( @{ $train->{cycle}{to} }, $1 ); + push( @{ $train->{cycle}{$cycle_id}{to} }, $1 ); } } } - if ( exists $train->{cycle} ) { - @{ $train->{cycle}{from} } = uniq @{ $train->{cycle}{from} // [] }; - @{ $train->{cycle}{to} } = uniq @{ $train->{cycle}{to} // [] }; + for my $cycle ( values %{ $train->{cycle} // {} } ) { + @{ $cycle->{from} } = uniq @{ $cycle->{from} // [] }; + @{ $cycle->{to} } = uniq @{ $cycle->{to} // [] }; } } diff --git a/bin/umlauf-to-dot b/bin/umlauf-to-dot index 69472d8..1ac2c2e 100755 --- a/bin/umlauf-to-dot +++ b/bin/umlauf-to-dot @@ -52,9 +52,10 @@ my $json = JSON->new->utf8->decode($file_content); my $map = $json->{train}; -my @train_numbers; -if ($line) { +sub build_cycle { + my ( $line, $cycle_id ) = @_; my @queue = ( [ $line, 0 ] ); + my @train_numbers; while (@queue) { my ( $train_number, $distance ) = @{ pop @queue }; @@ -65,8 +66,11 @@ if ($line) { } my @candidates; - push( @candidates, @{ $map->{$train_number}{cycle}{from} // [] } ); - push( @candidates, @{ $map->{$train_number}{cycle}{to} // [] } ); + + if ( my $c = $map->{$train_number}{cycle}{$cycle_id} ) { + push( @candidates, @{ $c->{from} // [] } ); + push( @candidates, @{ $c->{to} // [] } ); + } @candidates = uniq @candidates; @candidates @@ -79,32 +83,48 @@ if ($line) { push( @queue, map { [ $_, $distance + 1 ] } @candidates ); } -} -else { - @train_numbers = keys %{$map}; -} -my @output; - -for my $train_number (@train_numbers) { - for my $from ( @{ $map->{$train_number}{cycle}{from} } ) { - push( - @output, - sprintf( "%s -> %s;", - format_train( $from, $map->{$from} ), - format_train( $train_number, $map->{$train_number} ) ) - ); - } - for my $to ( @{ $map->{$train_number}{cycle}{to} } ) { - push( - @output, - sprintf( "%s -> %s;", - format_train( $train_number, $map->{$train_number} ), - format_train( $to, $map->{$to} ) ) - ); + my @output; + + for my $train_number (@train_numbers) { + if ( my $c = $map->{$train_number}{cycle}{$cycle_id} ) { + for my $from ( @{ $c->{from} // [] } ) { + push( + @output, + sprintf( "%s -> %s;", + format_train( $from, $map->{$from} ), + format_train( $train_number, $map->{$train_number} ) ) + ); + } + for my $to ( @{ $c->{to} // [] } ) { + push( + @output, + sprintf( "%s -> %s;", + format_train( $train_number, $map->{$train_number} ), + format_train( $to, $map->{$to} ) ) + ); + } + } + if ( $train_number != $line ) { + push( + @output, + sprintf( "%s [shape=box];", + format_train( $train_number, $map->{$train_number} ) ) + ); + } } + + return @output; } +my @cycle_ids = keys %{ $map->{$line}{cycle} // {} }; + say "digraph Umlauf {"; -say join( "\n", uniq @output ); + +for my $cycle_id (@cycle_ids) { + say join( "\n", uniq build_cycle( $line, $cycle_id ) ); +} + +printf( "%s [style=bold];\n", format_train( $line, $map->{$line} ) ); + say "}" diff --git a/schema.yml b/schema.yml index 44869d1..60f5b15 100644 --- a/schema.yml +++ b/schema.yml @@ -128,15 +128,17 @@ components: description: rated maximum speed in km/h cycle: type: object - properties: - from: - type: array - items: - type: string - to: - type: array - items: - type: string + additionalProperties: + type: object + properties: + from: + type: array + items: + type: string + to: + type: array + items: + type: string hasWagon: type: object additionalProperties: -- cgit v1.2.3 From aaee1881495d0e5bae0e4027b48d2d62d6d5c0f1 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Mon, 11 Jan 2021 21:23:31 +0100 Subject: do not silently ignore duplicate train entries (wip) --- bin/db-zugbildung-to-json | 397 +++++++++++++++++++++++++--------------------- bin/umlauf-to-dot | 60 ++++--- schema.yml | 4 +- 3 files changed, 255 insertions(+), 206 deletions(-) diff --git a/bin/db-zugbildung-to-json b/bin/db-zugbildung-to-json index b1f804d..4721d31 100755 --- a/bin/db-zugbildung-to-json +++ b/bin/db-zugbildung-to-json @@ -247,7 +247,6 @@ my @lines = split( qr{\n}, $wr_txt ); my ( $type, $number ); my %map; -my %wagon_map; my $state = "intro"; @@ -294,9 +293,24 @@ for my $line (@lines) { $state = "train_no"; #say "$type $number"; - $map{$number} = { - rawType => $type, - }; + + if ( exists $map{$number} ) { + + # train numbers are not unique (they may have several wagon + # orders with non-intersecting date ranges) + my @dup = @{ $map{$number}{dup} // [] }; + delete $map{$number}{dup}; + push( @dup, $map{$number} ); + $map{$number} = { + rawType => $type, + dup => [@dup], + }; + } + else { + $map{$number} = { + rawType => $type, + }; + } if ($name) { $map{$number}{name} = $name; @@ -442,7 +456,7 @@ for my $line (@lines) { my $wagon_number = $+{number}; my $rest = $+{rest}; - push( @{ $wagon_map{$number} }, [ $wagon_type, $wagon_number ] ); + push( @{ $map{$number}{raw_wagons} }, [ $wagon_type, $wagon_number ] ); if ( $rest and $rest =~ m{\S} ) { push( @{ $map{$number}{wagonorder_notes} }, $line ); @@ -463,237 +477,254 @@ for my $line (@lines) { } } -for my $train ( values %map ) { - if ( not $train->{route} ) { - next; - } +for my $train_no ( keys %map ) { + my @entries = @{ $map{$train_no}{dup} // [] }; + delete $map{$train_no}{dup}; + push( @entries, $map{$train_no} ); + $map{$train_no} = [@entries]; +} + +for my $trains ( values %map ) { + for my $train ( @{$trains} ) { + if ( not $train->{route} ) { + next; + } - my $route = $train->{route}; - my @parts = split( qr{ - }, $route ); + my $route = $train->{route}; + my @parts = split( qr{ - }, $route ); - if ( @parts < 2 ) { - $train->{route} = { raw => $route }; - next; - } - my ( $pre_start, $start, @middle, $end, $post_end ); + if ( @parts < 2 ) { + $train->{route} = { raw => $route }; + next; + } + my ( $pre_start, $start, @middle, $end, $post_end ); - if ( $parts[0] =~ m{ ^ [(] }x ) { - $pre_start = shift @parts; - $pre_start =~ s{ ^ [(] (.*) [)] $ }{$1}x; - } + if ( $parts[0] =~ m{ ^ [(] }x ) { + $pre_start = shift @parts; + $pre_start =~ s{ ^ [(] (.*) [)] $ }{$1}x; + } - if ( $parts[-1] =~ m{ ^ [(] }x ) { - $post_end = pop @parts; - $post_end =~ s{ ^ [(] (.*) [)] $ }{$1}x; - } + if ( $parts[-1] =~ m{ ^ [(] }x ) { + $post_end = pop @parts; + $post_end =~ s{ ^ [(] (.*) [)] $ }{$1}x; + } - $start = shift @parts; - $end = pop @parts; + $start = shift @parts; + $end = pop @parts; - $train->{route} = { - preStart => $pre_start, - start => $start, - middle => scalar @parts ? [@parts] : undef, - end => $end, - postEnd => $post_end, - }; + $train->{route} = { + preStart => $pre_start, + start => $start, + middle => scalar @parts ? [@parts] : undef, + end => $end, + postEnd => $post_end, + }; - for my $k ( keys %{ $train->{route} } ) { - if ( not defined $train->{route}{$k} ) { + for my $k ( keys %{ $train->{route} } ) { + if ( not defined $train->{route}{$k} ) { - # avoid null values (leave out the property instead) - delete $train->{route}{$k}; + # avoid null values (leave out the property instead) + delete $train->{route}{$k}; + } } } } -for my $train ( values %map ) { - if ( @{ $train->{wagonorder_notes} // [] } == 0 ) { - next; - } - my $first_line = $train->{wagonorder_notes}[0]; - my $from_offset = 0; - my $to_offset = 0; +for my $trains ( values %map ) { + for my $train ( @{$trains} ) { + if ( @{ $train->{wagonorder_notes} // [] } == 0 ) { + next; + } + my $first_line = $train->{wagonorder_notes}[0]; + my $from_offset = 0; + my $to_offset = 0; - # International trains (e.g. EC 8) are super special and may have another - # identifier after the "cycle to" train number. - if ( $first_line - =~ m{ ^ (? .*? [^0-9] ) (? \d+ ) (? [A-Z ]+ ) (? \d+ ) (?: \s+ \d+ )? $ }x - ) - { - $from_offset = length( $+{lead} ); - $to_offset = $from_offset + length( $+{from} ) + length( $+{middle} ); - } - if ( not $from_offset ) { - next; - } - my $cycle_id = "unknown"; - for my $i_line ( @{ $train->{wagonorder_notes} } ) { + # International trains (e.g. EC 8) are super special and may have another + # identifier after the "cycle to" train number. + if ( $first_line + =~ m{ ^ (? .*? [^0-9] ) (? \d+ ) (? [A-Z ]+ ) (? \d+ ) (?: \s+ \d+ )? $ }x + ) + { + $from_offset = length( $+{lead} ); + $to_offset + = $from_offset + length( $+{from} ) + length( $+{middle} ); + } + if ( not $from_offset ) { + next; + } + my $cycle_id = "unknown"; + for my $i_line ( @{ $train->{wagonorder_notes} } ) { # $i_line is an lvalue, so changes in $i_line end up in wagonorder_notes. # We don't want that. - my $line = substr( $i_line, 0 ); + my $line = substr( $i_line, 0 ); - # Fx abc appears to identify cycle groups. - # It also confuses the cycle detection code for cycles which do not have - # a "from" entry. - if ( $line =~ s{(F[0-9] [0-9]{3})}{.. ...} ) { - $cycle_id = $1; - } - if ( length($line) <= $from_offset ) { - next; - } - my $umlauf = substr( $line, $from_offset ); - if ( $umlauf =~ m{ ^ (\d+) }x ) { - push( @{ $train->{cycle}{$cycle_id}{from} }, $1 ); - } - if ( length($line) > $to_offset ) { - $umlauf = substr( $line, $to_offset ); + # Fx abc appears to identify cycle groups. + # It also confuses the cycle detection code for cycles which do not have + # a "from" entry. + if ( $line =~ s{(F[0-9] [0-9]{3})}{.. ...} ) { + $cycle_id = $1; + } + if ( length($line) <= $from_offset ) { + next; + } + my $umlauf = substr( $line, $from_offset ); if ( $umlauf =~ m{ ^ (\d+) }x ) { - push( @{ $train->{cycle}{$cycle_id}{to} }, $1 ); + push( @{ $train->{cycle}{$cycle_id}{from} }, $1 ); + } + if ( length($line) > $to_offset ) { + $umlauf = substr( $line, $to_offset ); + if ( $umlauf =~ m{ ^ (\d+) }x ) { + push( @{ $train->{cycle}{$cycle_id}{to} }, $1 ); + } } } - } - for my $cycle ( values %{ $train->{cycle} // {} } ) { - @{ $cycle->{from} } = uniq @{ $cycle->{from} // [] }; - @{ $cycle->{to} } = uniq @{ $cycle->{to} // [] }; + for my $cycle ( values %{ $train->{cycle} // {} } ) { + @{ $cycle->{from} } = uniq @{ $cycle->{from} // [] }; + @{ $cycle->{to} } = uniq @{ $cycle->{to} // [] }; + } } } -for my $train ( values %map ) { +for my $trains ( values %map ) { + for my $train ( @{$trains} ) { - if ( not $train->{details} ) { - next; - } + if ( not $train->{details} ) { + next; + } - my @details = @{ $train->{details} // [] }; - my %common_setup; - my @setups; + my @details = @{ $train->{details} // [] }; + my %common_setup; + my @setups; - for my $line (@details) { - my %setup; - if ( $line =~ m{ Tfz1:(\d+) }x ) { - $setup{series} = $1; - } - if ( $line =~ m{ Tfz2:(\d+) }x ) { - $setup{series2} = $1; - } - if ( $line =~ m{ Hg(\d+) }x ) { - $setup{vmax} = 0 + $1; - } - if ( $line =~ m{ BrH(\d+) }x ) { - $setup{brakingPercentage} = 0 + $1; - } - if ( $line =~ m{ (\d+)m }x ) { - $setup{length} = 0 + $1; - } - if ( %setup and $line =~ m{ ^ \s* ([+]? [A-Z ]{1,7}) }x ) { - my $station = $1; - $station =~ s{\s+$}{}; - my $special = ( $station =~ s{^[+]}{} ); - if ( $ds100_to_name{$station} ) { - $station = $ds100_to_name{$station}; + for my $line (@details) { + my %setup; + if ( $line =~ m{ Tfz1:(\d+) }x ) { + $setup{series} = $1; + } + if ( $line =~ m{ Tfz2:(\d+) }x ) { + $setup{series2} = $1; + } + if ( $line =~ m{ Hg(\d+) }x ) { + $setup{vmax} = 0 + $1; } - if ($special) { - $station = "+${station}"; + if ( $line =~ m{ BrH(\d+) }x ) { + $setup{brakingPercentage} = 0 + $1; } - $setup{station} = $station; + if ( $line =~ m{ (\d+)m }x ) { + $setup{length} = 0 + $1; + } + if ( %setup and $line =~ m{ ^ \s* ([+]? [A-Z ]{1,7}) }x ) { + my $station = $1; + $station =~ s{\s+$}{}; + my $special = ( $station =~ s{^[+]}{} ); + if ( $ds100_to_name{$station} ) { + $station = $ds100_to_name{$station}; + } + if ($special) { + $station = "+${station}"; + } + $setup{station} = $station; + } + push( @setups, {%setup} ); } - push( @setups, {%setup} ); - } - for my $key (qw(brakingPercentage length series series2 vmax)) { - if ( ( my @uniq = uniq grep { $_ } map { $_->{$key} } @setups ) == 1 ) { - $common_setup{$key} = $key =~ m{series} ? $uniq[0] : 0 + $uniq[0]; + for my $key (qw(brakingPercentage length series series2 vmax)) { + if ( ( my @uniq = uniq grep { $_ } map { $_->{$key} } @setups ) + == 1 ) + { + $common_setup{$key} + = $key =~ m{series} ? $uniq[0] : 0 + $uniq[0]; + } } - } - delete $train->{details}; - if (%common_setup) { - $train->{commonAttr} = {%common_setup}; + delete $train->{details}; + if (%common_setup) { + $train->{commonAttr} = {%common_setup}; + } + $train->{attrVariants} = [@setups]; } - $train->{attrVariants} = [@setups]; } -for my $train_number ( keys %map ) { - my $wagon_numbers_ok = 1; - my $wagon_types_ok = 1; - my $wagon_list_ok = 1; - my %bincount; - my %type_by_wagon_number; - for my $wagon ( @{ $wagon_map{$train_number} // [] } ) { - my ( $wagon_type, $wagon_number ) = @{$wagon}; - if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) { - - # Locomotives do not have a wagon number. That's OK. - next; - } - elsif ( not $wagon_number ) { - $wagon_numbers_ok = 0; - } - else { - $bincount{$wagon_number}++; - push( @{ $type_by_wagon_number{$wagon_number} }, $wagon_type ); - } - } - if ($wagon_numbers_ok) { - for my $wagon ( @{ $wagon_map{$train_number} // [] } ) { +for my $trains ( values %map ) { + for my $train ( @{$trains} ) { + my $wagon_numbers_ok = 1; + my $wagon_types_ok = 1; + my $wagon_list_ok = 1; + my %bincount; + my %type_by_wagon_number; + for my $wagon ( @{ $train->{raw_wagons} // [] } ) { my ( $wagon_type, $wagon_number ) = @{$wagon}; + if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) { - my $json_wagon = { type => $wagon_type }; - if ($wagon_number) { - $json_wagon->{number} = 0 + $wagon_number; + # Locomotives do not have a wagon number. That's OK. + next; + } + elsif ( not $wagon_number ) { + $wagon_numbers_ok = 0; } + else { + $bincount{$wagon_number}++; + push( @{ $type_by_wagon_number{$wagon_number} }, $wagon_type ); + } + } + if ($wagon_numbers_ok) { + for my $wagon ( @{ $train->{raw_wagons} // [] } ) { + my ( $wagon_type, $wagon_number ) = @{$wagon}; - push( @{ $map{$train_number}{wagons} }, $json_wagon ); + my $json_wagon = { type => $wagon_type }; + if ($wagon_number) { + $json_wagon->{number} = 0 + $wagon_number; + } - if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) { - next; - } + push( @{ $train->{wagons} }, $json_wagon ); + + if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) { + next; + } - if ( $bincount{$wagon_number} > 1 ) { - $wagon_list_ok = 0; - if ( $type_by_wagon_number{$wagon_number}[0] ne - $type_by_wagon_number{$wagon_number}[1] ) - { - $wagon_types_ok = 0; + if ( $bincount{$wagon_number} > 1 ) { + $wagon_list_ok = 0; + if ( $type_by_wagon_number{$wagon_number}[0] ne + $type_by_wagon_number{$wagon_number}[1] ) + { + $wagon_types_ok = 0; + } } } } - } - if ( not $wagon_types_ok ) { + if ( not $wagon_types_ok ) { - # train type may differ depending on date of week or similar. We don't - # parse/handle that yet. - $map{$train_number}{type} = $map{$train_number}{rawType}; - delete $map{$train_number}{shortType}; - delete $map{$train_number}{wagons}; - delete $map{$train_number}{hasWagon}; - } - elsif ( not $wagon_list_ok ) { + # train type may differ depending on date of week or similar. We don't + # parse/handle that yet. + $train->{type} = $train->{rawType}; + delete $train->{shortType}; + delete $train->{wagons}; + delete $train->{hasWagon}; + } + elsif ( not $wagon_list_ok ) { - # train type appears to be correct, but the list contains duplicates - # (e.g. due to unhandled schedule variants) - delete $map{$train_number}{wagons}; + # train type appears to be correct, but the list contains duplicates + # (e.g. due to unhandled schedule variants) + delete $train->{wagons}; + } } } # Cleanup -for my $train ( values %map ) { - delete $train->{wagonorder_notes}; +for my $trains ( values %map ) { + for my $train ( @{$trains} ) { + delete $train->{raw_wagons}; + delete $train->{wagonorder_notes}; + } } -# broken umlauf (indentation changes on page break) -delete $map{104}{cycle}; -delete $map{1588}{cycle}; -delete $map{1700}{cycle}; -delete $map{77689}{cycle}; -delete $map{78112}{cycle}; - -# indentation change between consecutive lines. wat. -delete $map{939}{cycle}; -delete $map{2310}{cycle}; +for my $bork (qw(104 1588 1700 77689 78112 939 2310)) { + for my $train ( @{ $map{$bork} } ) { + delete $train->{cycle}; + } +} # use canonical output (i.e., sort hash keys) to allow for easy diffing. say JSON->new->utf8->canonical->encode( diff --git a/bin/umlauf-to-dot b/bin/umlauf-to-dot index 1ac2c2e..860c962 100755 --- a/bin/umlauf-to-dot +++ b/bin/umlauf-to-dot @@ -67,9 +67,11 @@ sub build_cycle { my @candidates; - if ( my $c = $map->{$train_number}{cycle}{$cycle_id} ) { - push( @candidates, @{ $c->{from} // [] } ); - push( @candidates, @{ $c->{to} // [] } ); + for my $train ( @{ $map->{$train_number} } ) { + if ( my $c = $train->{cycle}{$cycle_id} ) { + push( @candidates, @{ $c->{from} // [] } ); + push( @candidates, @{ $c->{to} // [] } ); + } } @candidates = uniq @candidates; @@ -87,29 +89,39 @@ sub build_cycle { my @output; for my $train_number (@train_numbers) { - if ( my $c = $map->{$train_number}{cycle}{$cycle_id} ) { - for my $from ( @{ $c->{from} // [] } ) { - push( - @output, - sprintf( "%s -> %s;", - format_train( $from, $map->{$from} ), - format_train( $train_number, $map->{$train_number} ) ) - ); - } - for my $to ( @{ $c->{to} // [] } ) { - push( - @output, - sprintf( "%s -> %s;", - format_train( $train_number, $map->{$train_number} ), - format_train( $to, $map->{$to} ) ) - ); + for my $train ( @{ $map->{$train_number} } ) { + if ( my $c = $train->{cycle}{$cycle_id} ) { + for my $from ( @{ $c->{from} // [] } ) { + push( + @output, + sprintf( + "%s -> %s;", + format_train( $from, $map->{$from}[0] ), + format_train( + $train_number, $map->{$train_number}[0] + ) + ) + ); + } + for my $to ( @{ $c->{to} // [] } ) { + push( + @output, + sprintf( + "%s -> %s;", + format_train( + $train_number, $map->{$train_number}[0] + ), + format_train( $to, $map->{$to}[0] ) + ) + ); + } } } if ( $train_number != $line ) { push( @output, sprintf( "%s [shape=box];", - format_train( $train_number, $map->{$train_number} ) ) + format_train( $train_number, $map->{$train_number}[0] ) ) ); } } @@ -117,7 +129,11 @@ sub build_cycle { return @output; } -my @cycle_ids = keys %{ $map->{$line}{cycle} // {} }; +my @cycle_ids; + +for my $train ( @{ $map->{$line} } ) { + push( @cycle_ids, keys %{ $train->{cycle} // {} } ); +} say "digraph Umlauf {"; @@ -125,6 +141,6 @@ for my $cycle_id (@cycle_ids) { say join( "\n", uniq build_cycle( $line, $cycle_id ) ); } -printf( "%s [style=bold];\n", format_train( $line, $map->{$line} ) ); +printf( "%s [style=bold];\n", format_train( $line, $map->{$line}[0] ) ); say "}" diff --git a/schema.yml b/schema.yml index 60f5b15..0cbc85d 100644 --- a/schema.yml +++ b/schema.yml @@ -40,7 +40,9 @@ components: type: object description: dict mapping train numbers to train objects additionalProperties: - $ref: '#/components/schemas/train' + type: array + items: + $ref: '#/components/schemas/train' train: type: object properties: -- cgit v1.2.3 From 985001e8b4fff4b2c7dcf3ebd8d26a178e1cf561 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Mon, 11 Jan 2021 22:09:32 +0100 Subject: filter train duplicates by valid date range --- bin/db-zugbildung-to-json | 74 +++++++++++++++++++++++++++++++++++++++++++---- bin/umlauf-to-dot | 60 +++++++++++++++----------------------- schema.yml | 7 ++++- 3 files changed, 99 insertions(+), 42 deletions(-) diff --git a/bin/db-zugbildung-to-json b/bin/db-zugbildung-to-json index 4721d31..6159a0e 100755 --- a/bin/db-zugbildung-to-json +++ b/bin/db-zugbildung-to-json @@ -8,6 +8,8 @@ use warnings; use 5.020; use utf8; +use DateTime; +use DateTime::Format::Strptime; use Encode qw(decode); use File::Slurp qw(write_file); use IPC::Run3; @@ -25,6 +27,32 @@ sub show_usage { exit $exit_code; } +my $strp = DateTime::Format::Strptime->new( + pattern => '%F', + time_zone => 'Europe/Berlin' +); +my $now = DateTime->now( time_zone => 'Europe/Berlin' ); + +sub range_is_today { + my ($range) = @_; + + if ( $range =~ m{^(.*)/(.*)$} ) { + my $dt1 = $strp->parse_datetime($1); + my $dt2 = $strp->parse_datetime($2); + if ( $dt1 and $dt2 and $dt1->epoch < $now->epoch < $dt2->epoch ) { + return 1; + } + } + else { + if ( my $dt = $strp->parse_datetime($range) ) { + if ( $dt->ymd eq $now->ymd ) { + return 1; + } + } + } + return; +} + my @weekdays = (qw(Mo Di Mi Do Fr Sa So)); my @months = (qw(0 I II III IV V VI VII VIII IX X XI XII)); my %weekday = map { ( $weekdays[$_] => $_ ) } ( 0 .. $#weekdays ); @@ -193,7 +221,7 @@ sub parse_condition { my %ret; if ( $line - =~ m{ ^ \s* (? .*? ) \s - \s (? [^,]+ ) , \s* (? (Mo|Di|Mi|Do|Fr|Sa|So|tgl[.]|[+]|-)+) \s* (? .* ) $ }x + =~ m{ ^ \s* (? .*? ) \s - \s (? [^,]+ ) , \s* (? N \s* )? (? (Mo|Di|Mi|Do|Fr|Sa|So|tgl[.]|[+]|-)+) \s* (? .* ) $ }x ) { $ret{from} = $+{from}; @@ -726,12 +754,48 @@ for my $bork (qw(104 1588 1700 77689 78112 939 2310)) { } } +my %smap; +for my $train_no ( keys %map ) { + if ( @{ $map{$train_no} } == 1 ) { + $smap{$train_no} = $map{$train_no}[0]; + } + else { + my $latest_valid; + my $valid_count = 0; + for my $train ( @{ $map{$train_no} } ) { + my $is_valid = 0; + for my $schedule ( @{ $train->{schedules} // [] } ) { + for my $valid ( @{ $schedule->{valid} // [] } ) { + if ( range_is_today($valid) ) { + $is_valid = 1; + } + } + + # invalid may override valid for certain days + for my $invalid ( @{ $schedule->{invalid} // [] } ) { + if ( range_is_today($invalid) ) { + $is_valid = 0; + } + } + } + if ($is_valid) { + $latest_valid = $train; + $valid_count++; + } + } + if ( $valid_count == 1 ) { + $smap{$train_no} = $latest_valid; + } + } +} + # use canonical output (i.e., sort hash keys) to allow for easy diffing. say JSON->new->utf8->canonical->encode( { - deprecated => \0, - source => $wr_name, - train => {%map}, - valid => $valid, + deprecated => \0, + source => $wr_name, + train => {%smap}, + train_variants => {%map}, + valid => $valid, } ); diff --git a/bin/umlauf-to-dot b/bin/umlauf-to-dot index 860c962..47a766c 100755 --- a/bin/umlauf-to-dot +++ b/bin/umlauf-to-dot @@ -67,11 +67,10 @@ sub build_cycle { my @candidates; - for my $train ( @{ $map->{$train_number} } ) { - if ( my $c = $train->{cycle}{$cycle_id} ) { - push( @candidates, @{ $c->{from} // [] } ); - push( @candidates, @{ $c->{to} // [] } ); - } + my $train = $map->{$train_number}; + if ( my $c = $train->{cycle}{$cycle_id} ) { + push( @candidates, @{ $c->{from} // [] } ); + push( @candidates, @{ $c->{to} // [] } ); } @candidates = uniq @candidates; @@ -89,39 +88,30 @@ sub build_cycle { my @output; for my $train_number (@train_numbers) { - for my $train ( @{ $map->{$train_number} } ) { - if ( my $c = $train->{cycle}{$cycle_id} ) { - for my $from ( @{ $c->{from} // [] } ) { - push( - @output, - sprintf( - "%s -> %s;", - format_train( $from, $map->{$from}[0] ), - format_train( - $train_number, $map->{$train_number}[0] - ) - ) - ); - } - for my $to ( @{ $c->{to} // [] } ) { - push( - @output, - sprintf( - "%s -> %s;", - format_train( - $train_number, $map->{$train_number}[0] - ), - format_train( $to, $map->{$to}[0] ) - ) - ); - } + my $train = $map->{$train_number}; + if ( my $c = $train->{cycle}{$cycle_id} ) { + for my $from ( @{ $c->{from} // [] } ) { + push( + @output, + sprintf( "%s -> %s;", + format_train( $from, $map->{$from} ), + format_train( $train_number, $map->{$train_number} ) ) + ); + } + for my $to ( @{ $c->{to} // [] } ) { + push( + @output, + sprintf( "%s -> %s;", + format_train( $train_number, $map->{$train_number} ), + format_train( $to, $map->{$to} ) ) + ); } } if ( $train_number != $line ) { push( @output, sprintf( "%s [shape=box];", - format_train( $train_number, $map->{$train_number}[0] ) ) + format_train( $train_number, $map->{$train_number} ) ) ); } } @@ -131,9 +121,7 @@ sub build_cycle { my @cycle_ids; -for my $train ( @{ $map->{$line} } ) { - push( @cycle_ids, keys %{ $train->{cycle} // {} } ); -} +push( @cycle_ids, keys %{ $map->{$line}{cycle} // {} } ); say "digraph Umlauf {"; @@ -141,6 +129,6 @@ for my $cycle_id (@cycle_ids) { say join( "\n", uniq build_cycle( $line, $cycle_id ) ); } -printf( "%s [style=bold];\n", format_train( $line, $map->{$line}[0] ) ); +printf( "%s [style=bold];\n", format_train( $line, $map->{$line} ) ); say "}" diff --git a/schema.yml b/schema.yml index 0cbc85d..b7542f9 100644 --- a/schema.yml +++ b/schema.yml @@ -38,7 +38,12 @@ components: description: ISO 8601 interval describing when this train composition dataset is valid train: type: object - description: dict mapping train numbers to train objects + description: dict mapping train numbers to probably valid train objects + additionalProperties: + $ref: '#/components/schemas/train' + train_variants: + type: object + description: dict mapping train numbers to list of possible train objects additionalProperties: type: array items: -- cgit v1.2.3