diff options
author | Daniel Friesel <derf@finalrewind.org> | 2021-01-11 21:23:31 +0100 |
---|---|---|
committer | Daniel Friesel <derf@finalrewind.org> | 2021-01-11 21:23:31 +0100 |
commit | aaee1881495d0e5bae0e4027b48d2d62d6d5c0f1 (patch) | |
tree | 700a5857594eb8cd6018185bd2eab879934f0567 | |
parent | c6270546445c1c3954636a1c92d882d1b4b379bd (diff) |
do not silently ignore duplicate train entries (wip)
-rwxr-xr-x | bin/db-zugbildung-to-json | 397 | ||||
-rwxr-xr-x | bin/umlauf-to-dot | 60 | ||||
-rw-r--r-- | schema.yml | 4 |
3 files changed, 255 insertions, 206 deletions
diff --git a/bin/db-zugbildung-to-json b/bin/db-zugbildung-to-json index b1f804d..4721d31 100755 --- a/bin/db-zugbildung-to-json +++ b/bin/db-zugbildung-to-json @@ -247,7 +247,6 @@ my @lines = split( qr{\n}, $wr_txt ); my ( $type, $number ); my %map; -my %wagon_map; my $state = "intro"; @@ -294,9 +293,24 @@ for my $line (@lines) { $state = "train_no"; #say "$type $number"; - $map{$number} = { - rawType => $type, - }; + + if ( exists $map{$number} ) { + + # train numbers are not unique (they may have several wagon + # orders with non-intersecting date ranges) + my @dup = @{ $map{$number}{dup} // [] }; + delete $map{$number}{dup}; + push( @dup, $map{$number} ); + $map{$number} = { + rawType => $type, + dup => [@dup], + }; + } + else { + $map{$number} = { + rawType => $type, + }; + } if ($name) { $map{$number}{name} = $name; @@ -442,7 +456,7 @@ for my $line (@lines) { my $wagon_number = $+{number}; my $rest = $+{rest}; - push( @{ $wagon_map{$number} }, [ $wagon_type, $wagon_number ] ); + push( @{ $map{$number}{raw_wagons} }, [ $wagon_type, $wagon_number ] ); if ( $rest and $rest =~ m{\S} ) { push( @{ $map{$number}{wagonorder_notes} }, $line ); @@ -463,237 +477,254 @@ for my $line (@lines) { } } -for my $train ( values %map ) { - if ( not $train->{route} ) { - next; - } +for my $train_no ( keys %map ) { + my @entries = @{ $map{$train_no}{dup} // [] }; + delete $map{$train_no}{dup}; + push( @entries, $map{$train_no} ); + $map{$train_no} = [@entries]; +} + +for my $trains ( values %map ) { + for my $train ( @{$trains} ) { + if ( not $train->{route} ) { + next; + } - my $route = $train->{route}; - my @parts = split( qr{ - }, $route ); + my $route = $train->{route}; + my @parts = split( qr{ - }, $route ); - if ( @parts < 2 ) { - $train->{route} = { raw => $route }; - next; - } - my ( $pre_start, $start, @middle, $end, $post_end ); + if ( @parts < 2 ) { + $train->{route} = { raw => $route }; + next; + } + my ( $pre_start, $start, @middle, $end, $post_end ); - if ( $parts[0] =~ m{ ^ [(] }x ) { - $pre_start = shift @parts; - $pre_start =~ s{ ^ [(] (.*) [)] $ }{$1}x; - } + if ( $parts[0] =~ m{ ^ [(] }x ) { + $pre_start = shift @parts; + $pre_start =~ s{ ^ [(] (.*) [)] $ }{$1}x; + } - if ( $parts[-1] =~ m{ ^ [(] }x ) { - $post_end = pop @parts; - $post_end =~ s{ ^ [(] (.*) [)] $ }{$1}x; - } + if ( $parts[-1] =~ m{ ^ [(] }x ) { + $post_end = pop @parts; + $post_end =~ s{ ^ [(] (.*) [)] $ }{$1}x; + } - $start = shift @parts; - $end = pop @parts; + $start = shift @parts; + $end = pop @parts; - $train->{route} = { - preStart => $pre_start, - start => $start, - middle => scalar @parts ? [@parts] : undef, - end => $end, - postEnd => $post_end, - }; + $train->{route} = { + preStart => $pre_start, + start => $start, + middle => scalar @parts ? [@parts] : undef, + end => $end, + postEnd => $post_end, + }; - for my $k ( keys %{ $train->{route} } ) { - if ( not defined $train->{route}{$k} ) { + for my $k ( keys %{ $train->{route} } ) { + if ( not defined $train->{route}{$k} ) { - # avoid null values (leave out the property instead) - delete $train->{route}{$k}; + # avoid null values (leave out the property instead) + delete $train->{route}{$k}; + } } } } -for my $train ( values %map ) { - if ( @{ $train->{wagonorder_notes} // [] } == 0 ) { - next; - } - my $first_line = $train->{wagonorder_notes}[0]; - my $from_offset = 0; - my $to_offset = 0; +for my $trains ( values %map ) { + for my $train ( @{$trains} ) { + if ( @{ $train->{wagonorder_notes} // [] } == 0 ) { + next; + } + my $first_line = $train->{wagonorder_notes}[0]; + my $from_offset = 0; + my $to_offset = 0; - # International trains (e.g. EC 8) are super special and may have another - # identifier after the "cycle to" train number. - if ( $first_line - =~ m{ ^ (?<lead> .*? [^0-9] ) (?<from> \d+ ) (?<middle> [A-Z ]+ ) (?<to> \d+ ) (?: \s+ \d+ )? $ }x - ) - { - $from_offset = length( $+{lead} ); - $to_offset = $from_offset + length( $+{from} ) + length( $+{middle} ); - } - if ( not $from_offset ) { - next; - } - my $cycle_id = "unknown"; - for my $i_line ( @{ $train->{wagonorder_notes} } ) { + # International trains (e.g. EC 8) are super special and may have another + # identifier after the "cycle to" train number. + if ( $first_line + =~ m{ ^ (?<lead> .*? [^0-9] ) (?<from> \d+ ) (?<middle> [A-Z ]+ ) (?<to> \d+ ) (?: \s+ \d+ )? $ }x + ) + { + $from_offset = length( $+{lead} ); + $to_offset + = $from_offset + length( $+{from} ) + length( $+{middle} ); + } + if ( not $from_offset ) { + next; + } + my $cycle_id = "unknown"; + for my $i_line ( @{ $train->{wagonorder_notes} } ) { # $i_line is an lvalue, so changes in $i_line end up in wagonorder_notes. # We don't want that. - my $line = substr( $i_line, 0 ); + my $line = substr( $i_line, 0 ); - # Fx abc appears to identify cycle groups. - # It also confuses the cycle detection code for cycles which do not have - # a "from" entry. - if ( $line =~ s{(F[0-9] [0-9]{3})}{.. ...} ) { - $cycle_id = $1; - } - if ( length($line) <= $from_offset ) { - next; - } - my $umlauf = substr( $line, $from_offset ); - if ( $umlauf =~ m{ ^ (\d+) }x ) { - push( @{ $train->{cycle}{$cycle_id}{from} }, $1 ); - } - if ( length($line) > $to_offset ) { - $umlauf = substr( $line, $to_offset ); + # Fx abc appears to identify cycle groups. + # It also confuses the cycle detection code for cycles which do not have + # a "from" entry. + if ( $line =~ s{(F[0-9] [0-9]{3})}{.. ...} ) { + $cycle_id = $1; + } + if ( length($line) <= $from_offset ) { + next; + } + my $umlauf = substr( $line, $from_offset ); if ( $umlauf =~ m{ ^ (\d+) }x ) { - push( @{ $train->{cycle}{$cycle_id}{to} }, $1 ); + push( @{ $train->{cycle}{$cycle_id}{from} }, $1 ); + } + if ( length($line) > $to_offset ) { + $umlauf = substr( $line, $to_offset ); + if ( $umlauf =~ m{ ^ (\d+) }x ) { + push( @{ $train->{cycle}{$cycle_id}{to} }, $1 ); + } } } - } - for my $cycle ( values %{ $train->{cycle} // {} } ) { - @{ $cycle->{from} } = uniq @{ $cycle->{from} // [] }; - @{ $cycle->{to} } = uniq @{ $cycle->{to} // [] }; + for my $cycle ( values %{ $train->{cycle} // {} } ) { + @{ $cycle->{from} } = uniq @{ $cycle->{from} // [] }; + @{ $cycle->{to} } = uniq @{ $cycle->{to} // [] }; + } } } -for my $train ( values %map ) { +for my $trains ( values %map ) { + for my $train ( @{$trains} ) { - if ( not $train->{details} ) { - next; - } + if ( not $train->{details} ) { + next; + } - my @details = @{ $train->{details} // [] }; - my %common_setup; - my @setups; + my @details = @{ $train->{details} // [] }; + my %common_setup; + my @setups; - for my $line (@details) { - my %setup; - if ( $line =~ m{ Tfz1:(\d+) }x ) { - $setup{series} = $1; - } - if ( $line =~ m{ Tfz2:(\d+) }x ) { - $setup{series2} = $1; - } - if ( $line =~ m{ Hg(\d+) }x ) { - $setup{vmax} = 0 + $1; - } - if ( $line =~ m{ BrH(\d+) }x ) { - $setup{brakingPercentage} = 0 + $1; - } - if ( $line =~ m{ (\d+)m }x ) { - $setup{length} = 0 + $1; - } - if ( %setup and $line =~ m{ ^ \s* ([+]? [A-Z ]{1,7}) }x ) { - my $station = $1; - $station =~ s{\s+$}{}; - my $special = ( $station =~ s{^[+]}{} ); - if ( $ds100_to_name{$station} ) { - $station = $ds100_to_name{$station}; + for my $line (@details) { + my %setup; + if ( $line =~ m{ Tfz1:(\d+) }x ) { + $setup{series} = $1; + } + if ( $line =~ m{ Tfz2:(\d+) }x ) { + $setup{series2} = $1; + } + if ( $line =~ m{ Hg(\d+) }x ) { + $setup{vmax} = 0 + $1; } - if ($special) { - $station = "+${station}"; + if ( $line =~ m{ BrH(\d+) }x ) { + $setup{brakingPercentage} = 0 + $1; } - $setup{station} = $station; + if ( $line =~ m{ (\d+)m }x ) { + $setup{length} = 0 + $1; + } + if ( %setup and $line =~ m{ ^ \s* ([+]? [A-Z ]{1,7}) }x ) { + my $station = $1; + $station =~ s{\s+$}{}; + my $special = ( $station =~ s{^[+]}{} ); + if ( $ds100_to_name{$station} ) { + $station = $ds100_to_name{$station}; + } + if ($special) { + $station = "+${station}"; + } + $setup{station} = $station; + } + push( @setups, {%setup} ); } - push( @setups, {%setup} ); - } - for my $key (qw(brakingPercentage length series series2 vmax)) { - if ( ( my @uniq = uniq grep { $_ } map { $_->{$key} } @setups ) == 1 ) { - $common_setup{$key} = $key =~ m{series} ? $uniq[0] : 0 + $uniq[0]; + for my $key (qw(brakingPercentage length series series2 vmax)) { + if ( ( my @uniq = uniq grep { $_ } map { $_->{$key} } @setups ) + == 1 ) + { + $common_setup{$key} + = $key =~ m{series} ? $uniq[0] : 0 + $uniq[0]; + } } - } - delete $train->{details}; - if (%common_setup) { - $train->{commonAttr} = {%common_setup}; + delete $train->{details}; + if (%common_setup) { + $train->{commonAttr} = {%common_setup}; + } + $train->{attrVariants} = [@setups]; } - $train->{attrVariants} = [@setups]; } -for my $train_number ( keys %map ) { - my $wagon_numbers_ok = 1; - my $wagon_types_ok = 1; - my $wagon_list_ok = 1; - my %bincount; - my %type_by_wagon_number; - for my $wagon ( @{ $wagon_map{$train_number} // [] } ) { - my ( $wagon_type, $wagon_number ) = @{$wagon}; - if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) { - - # Locomotives do not have a wagon number. That's OK. - next; - } - elsif ( not $wagon_number ) { - $wagon_numbers_ok = 0; - } - else { - $bincount{$wagon_number}++; - push( @{ $type_by_wagon_number{$wagon_number} }, $wagon_type ); - } - } - if ($wagon_numbers_ok) { - for my $wagon ( @{ $wagon_map{$train_number} // [] } ) { +for my $trains ( values %map ) { + for my $train ( @{$trains} ) { + my $wagon_numbers_ok = 1; + my $wagon_types_ok = 1; + my $wagon_list_ok = 1; + my %bincount; + my %type_by_wagon_number; + for my $wagon ( @{ $train->{raw_wagons} // [] } ) { my ( $wagon_type, $wagon_number ) = @{$wagon}; + if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) { - my $json_wagon = { type => $wagon_type }; - if ($wagon_number) { - $json_wagon->{number} = 0 + $wagon_number; + # Locomotives do not have a wagon number. That's OK. + next; + } + elsif ( not $wagon_number ) { + $wagon_numbers_ok = 0; } + else { + $bincount{$wagon_number}++; + push( @{ $type_by_wagon_number{$wagon_number} }, $wagon_type ); + } + } + if ($wagon_numbers_ok) { + for my $wagon ( @{ $train->{raw_wagons} // [] } ) { + my ( $wagon_type, $wagon_number ) = @{$wagon}; - push( @{ $map{$train_number}{wagons} }, $json_wagon ); + my $json_wagon = { type => $wagon_type }; + if ($wagon_number) { + $json_wagon->{number} = 0 + $wagon_number; + } - if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) { - next; - } + push( @{ $train->{wagons} }, $json_wagon ); + + if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) { + next; + } - if ( $bincount{$wagon_number} > 1 ) { - $wagon_list_ok = 0; - if ( $type_by_wagon_number{$wagon_number}[0] ne - $type_by_wagon_number{$wagon_number}[1] ) - { - $wagon_types_ok = 0; + if ( $bincount{$wagon_number} > 1 ) { + $wagon_list_ok = 0; + if ( $type_by_wagon_number{$wagon_number}[0] ne + $type_by_wagon_number{$wagon_number}[1] ) + { + $wagon_types_ok = 0; + } } } } - } - if ( not $wagon_types_ok ) { + if ( not $wagon_types_ok ) { - # train type may differ depending on date of week or similar. We don't - # parse/handle that yet. - $map{$train_number}{type} = $map{$train_number}{rawType}; - delete $map{$train_number}{shortType}; - delete $map{$train_number}{wagons}; - delete $map{$train_number}{hasWagon}; - } - elsif ( not $wagon_list_ok ) { + # train type may differ depending on date of week or similar. We don't + # parse/handle that yet. + $train->{type} = $train->{rawType}; + delete $train->{shortType}; + delete $train->{wagons}; + delete $train->{hasWagon}; + } + elsif ( not $wagon_list_ok ) { - # train type appears to be correct, but the list contains duplicates - # (e.g. due to unhandled schedule variants) - delete $map{$train_number}{wagons}; + # train type appears to be correct, but the list contains duplicates + # (e.g. due to unhandled schedule variants) + delete $train->{wagons}; + } } } # Cleanup -for my $train ( values %map ) { - delete $train->{wagonorder_notes}; +for my $trains ( values %map ) { + for my $train ( @{$trains} ) { + delete $train->{raw_wagons}; + delete $train->{wagonorder_notes}; + } } -# broken umlauf (indentation changes on page break) -delete $map{104}{cycle}; -delete $map{1588}{cycle}; -delete $map{1700}{cycle}; -delete $map{77689}{cycle}; -delete $map{78112}{cycle}; - -# indentation change between consecutive lines. wat. -delete $map{939}{cycle}; -delete $map{2310}{cycle}; +for my $bork (qw(104 1588 1700 77689 78112 939 2310)) { + for my $train ( @{ $map{$bork} } ) { + delete $train->{cycle}; + } +} # use canonical output (i.e., sort hash keys) to allow for easy diffing. say JSON->new->utf8->canonical->encode( diff --git a/bin/umlauf-to-dot b/bin/umlauf-to-dot index 1ac2c2e..860c962 100755 --- a/bin/umlauf-to-dot +++ b/bin/umlauf-to-dot @@ -67,9 +67,11 @@ sub build_cycle { my @candidates; - if ( my $c = $map->{$train_number}{cycle}{$cycle_id} ) { - push( @candidates, @{ $c->{from} // [] } ); - push( @candidates, @{ $c->{to} // [] } ); + for my $train ( @{ $map->{$train_number} } ) { + if ( my $c = $train->{cycle}{$cycle_id} ) { + push( @candidates, @{ $c->{from} // [] } ); + push( @candidates, @{ $c->{to} // [] } ); + } } @candidates = uniq @candidates; @@ -87,29 +89,39 @@ sub build_cycle { my @output; for my $train_number (@train_numbers) { - if ( my $c = $map->{$train_number}{cycle}{$cycle_id} ) { - for my $from ( @{ $c->{from} // [] } ) { - push( - @output, - sprintf( "%s -> %s;", - format_train( $from, $map->{$from} ), - format_train( $train_number, $map->{$train_number} ) ) - ); - } - for my $to ( @{ $c->{to} // [] } ) { - push( - @output, - sprintf( "%s -> %s;", - format_train( $train_number, $map->{$train_number} ), - format_train( $to, $map->{$to} ) ) - ); + for my $train ( @{ $map->{$train_number} } ) { + if ( my $c = $train->{cycle}{$cycle_id} ) { + for my $from ( @{ $c->{from} // [] } ) { + push( + @output, + sprintf( + "%s -> %s;", + format_train( $from, $map->{$from}[0] ), + format_train( + $train_number, $map->{$train_number}[0] + ) + ) + ); + } + for my $to ( @{ $c->{to} // [] } ) { + push( + @output, + sprintf( + "%s -> %s;", + format_train( + $train_number, $map->{$train_number}[0] + ), + format_train( $to, $map->{$to}[0] ) + ) + ); + } } } if ( $train_number != $line ) { push( @output, sprintf( "%s [shape=box];", - format_train( $train_number, $map->{$train_number} ) ) + format_train( $train_number, $map->{$train_number}[0] ) ) ); } } @@ -117,7 +129,11 @@ sub build_cycle { return @output; } -my @cycle_ids = keys %{ $map->{$line}{cycle} // {} }; +my @cycle_ids; + +for my $train ( @{ $map->{$line} } ) { + push( @cycle_ids, keys %{ $train->{cycle} // {} } ); +} say "digraph Umlauf {"; @@ -125,6 +141,6 @@ for my $cycle_id (@cycle_ids) { say join( "\n", uniq build_cycle( $line, $cycle_id ) ); } -printf( "%s [style=bold];\n", format_train( $line, $map->{$line} ) ); +printf( "%s [style=bold];\n", format_train( $line, $map->{$line}[0] ) ); say "}" @@ -40,7 +40,9 @@ components: type: object description: dict mapping train numbers to train objects additionalProperties: - $ref: '#/components/schemas/train' + type: array + items: + $ref: '#/components/schemas/train' train: type: object properties: |