diff options
author | Daniel Friesel <derf@finalrewind.org> | 2021-01-12 17:13:41 +0100 |
---|---|---|
committer | Daniel Friesel <derf@finalrewind.org> | 2021-01-12 17:13:41 +0100 |
commit | 5490500350ebc9f4fd362767ac5eed011a6531f7 (patch) | |
tree | cc7b63ed786989638a966e2674f8b3bc70919def | |
parent | de4660a30682a19f697d9bbcd4be1875abe625aa (diff) | |
parent | 985001e8b4fff4b2c7dcf3ebd8d26a178e1cf561 (diff) |
Merge branch 'main' of git.finalrewind.org:var/git_root/db-zugbildung-to-json
-rwxr-xr-x | bin/db-zugbildung-to-json | 465 | ||||
-rwxr-xr-x | bin/umlauf-to-dot | 78 | ||||
-rw-r--r-- | schema.yml | 29 |
3 files changed, 352 insertions, 220 deletions
diff --git a/bin/db-zugbildung-to-json b/bin/db-zugbildung-to-json index 0607222..fc01f34 100755 --- a/bin/db-zugbildung-to-json +++ b/bin/db-zugbildung-to-json @@ -8,6 +8,8 @@ use warnings; use 5.020; use utf8; +use DateTime; +use DateTime::Format::Strptime; use Encode qw(decode); use File::Slurp qw(write_file); use IPC::Run3; @@ -25,6 +27,32 @@ sub show_usage { exit $exit_code; } +my $strp = DateTime::Format::Strptime->new( + pattern => '%F', + time_zone => 'Europe/Berlin' +); +my $now = DateTime->now( time_zone => 'Europe/Berlin' ); + +sub range_is_today { + my ($range) = @_; + + if ( $range =~ m{^(.*)/(.*)$} ) { + my $dt1 = $strp->parse_datetime($1); + my $dt2 = $strp->parse_datetime($2); + if ( $dt1 and $dt2 and $dt1->epoch < $now->epoch < $dt2->epoch ) { + return 1; + } + } + else { + if ( my $dt = $strp->parse_datetime($range) ) { + if ( $dt->ymd eq $now->ymd ) { + return 1; + } + } + } + return; +} + my @weekdays = (qw(Mo Di Mi Do Fr Sa So)); my @months = (qw(0 I II III IV V VI VII VIII IX X XI XII)); my %weekday = map { ( $weekdays[$_] => $_ ) } ( 0 .. $#weekdays ); @@ -193,7 +221,7 @@ sub parse_condition { my %ret; if ( $line - =~ m{ ^ \s* (?<from> .*? ) \s - \s (?<to> [^,]+ ) , \s* (?<weekdays> (Mo|Di|Mi|Do|Fr|Sa|So|tgl[.]|[+]|-)+) \s* (?<dates> .* ) $ }x + =~ m{ ^ \s* (?<from> .*? ) \s - \s (?<to> [^,]+ ) , \s* (?<unknown> N \s* )? (?<weekdays> (Mo|Di|Mi|Do|Fr|Sa|So|tgl[.]|[+]|-)+) \s* (?<dates> .* ) $ }x ) { $ret{from} = $+{from}; @@ -247,7 +275,6 @@ my @lines = split( qr{\n}, $wr_txt ); my ( $type, $number ); my %map; -my %wagon_map; my $state = "intro"; @@ -294,9 +321,24 @@ for my $line (@lines) { $state = "train_no"; #say "$type $number"; - $map{$number} = { - rawType => $type, - }; + + if ( exists $map{$number} ) { + + # train numbers are not unique (they may have several wagon + # orders with non-intersecting date ranges) + my @dup = @{ $map{$number}{dup} // [] }; + delete $map{$number}{dup}; + push( @dup, $map{$number} ); + $map{$number} = { + rawType => $type, + dup => [@dup], + }; + } + else { + $map{$number} = { + rawType => $type, + }; + } if ($name) { $map{$number}{name} = $name; @@ -442,7 +484,7 @@ for my $line (@lines) { my $wagon_number = $+{number}; my $rest = $+{rest}; - push( @{ $wagon_map{$number} }, [ $wagon_type, $wagon_number ] ); + push( @{ $map{$number}{raw_wagons} }, [ $wagon_type, $wagon_number ] ); if ( $rest and $rest =~ m{\S} ) { push( @{ $map{$number}{wagonorder_notes} }, $line ); @@ -463,240 +505,297 @@ for my $line (@lines) { } } -for my $train ( values %map ) { - if ( not $train->{route} ) { - next; - } +for my $train_no ( keys %map ) { + my @entries = @{ $map{$train_no}{dup} // [] }; + delete $map{$train_no}{dup}; + push( @entries, $map{$train_no} ); + $map{$train_no} = [@entries]; +} - my $route = $train->{route}; - my @parts = split( qr{ - }, $route ); +for my $trains ( values %map ) { + for my $train ( @{$trains} ) { + if ( not $train->{route} ) { + next; + } - if ( @parts < 2 ) { - $train->{route} = { raw => $route }; - next; - } - my ( $pre_start, $start, @middle, $end, $post_end ); + my $route = $train->{route}; + my @parts = split( qr{ - }, $route ); - if ( $parts[0] =~ m{ ^ [(] }x ) { - $pre_start = shift @parts; - $pre_start =~ s{ ^ [(] (.*) [)] $ }{$1}x; - } + if ( @parts < 2 ) { + $train->{route} = { raw => $route }; + next; + } + my ( $pre_start, $start, @middle, $end, $post_end ); - if ( $parts[-1] =~ m{ ^ [(] }x ) { - $post_end = pop @parts; - $post_end =~ s{ ^ [(] (.*) [)] $ }{$1}x; - } + if ( $parts[0] =~ m{ ^ [(] }x ) { + $pre_start = shift @parts; + $pre_start =~ s{ ^ [(] (.*) [)] $ }{$1}x; + } - $start = shift @parts; - $end = pop @parts; + if ( $parts[-1] =~ m{ ^ [(] }x ) { + $post_end = pop @parts; + $post_end =~ s{ ^ [(] (.*) [)] $ }{$1}x; + } - $train->{route} = { - preStart => $pre_start, - start => $start, - middle => scalar @parts ? [@parts] : undef, - end => $end, - postEnd => $post_end, - }; + $start = shift @parts; + $end = pop @parts; - for my $k ( keys %{ $train->{route} } ) { - if ( not defined $train->{route}{$k} ) { + $train->{route} = { + preStart => $pre_start, + start => $start, + middle => scalar @parts ? [@parts] : undef, + end => $end, + postEnd => $post_end, + }; - # avoid null values (leave out the property instead) - delete $train->{route}{$k}; + for my $k ( keys %{ $train->{route} } ) { + if ( not defined $train->{route}{$k} ) { + + # avoid null values (leave out the property instead) + delete $train->{route}{$k}; + } } } } -for my $train ( values %map ) { - if ( @{ $train->{wagonorder_notes} // [] } == 0 ) { - next; - } - my $first_line = $train->{wagonorder_notes}[0]; - my $from_offset = 0; - my $to_offset = 0; +for my $trains ( values %map ) { + for my $train ( @{$trains} ) { + if ( @{ $train->{wagonorder_notes} // [] } == 0 ) { + next; + } + my $first_line = $train->{wagonorder_notes}[0]; + my $from_offset = 0; + my $to_offset = 0; - # International trains (e.g. EC 8) are super special and may have another - # identifier after the "cycle to" train number. - if ( $first_line - =~ m{ ^ (?<lead> .*? [^0-9] ) (?<from> \d+ ) (?<middle> [A-Z ]+ ) (?<to> \d+ ) (?: \s+ \d+ )? $ }x - ) - { - $from_offset = length( $+{lead} ); - $to_offset = $from_offset + length( $+{from} ) + length( $+{middle} ); - } - if ( not $from_offset ) { - next; - } - for my $i_line ( @{ $train->{wagonorder_notes} } ) { + # International trains (e.g. EC 8) are super special and may have another + # identifier after the "cycle to" train number. + if ( $first_line + =~ m{ ^ (?<lead> .*? [^0-9] ) (?<from> \d+ ) (?<middle> [A-Z ]+ ) (?<to> \d+ ) (?: \s+ \d+ )? $ }x + ) + { + $from_offset = length( $+{lead} ); + $to_offset + = $from_offset + length( $+{from} ) + length( $+{middle} ); + } + if ( not $from_offset ) { + next; + } + my $cycle_id = "unknown"; + for my $i_line ( @{ $train->{wagonorder_notes} } ) { # $i_line is an lvalue, so changes in $i_line end up in wagonorder_notes. # We don't want that. - my $line = substr( $i_line, 0 ); + my $line = substr( $i_line, 0 ); - # Some cycles do not have a "from" entry, but they do have an odd - # F-identifier, which confuses the cycle detection code. remove it. - $line =~ s{F[0-9] [0-9]{3}}{.. ...}; - if ( length($line) <= $from_offset ) { - next; - } - my $umlauf = substr( $line, $from_offset ); - if ( $umlauf =~ m{ ^ (\d+) }x ) { - push( @{ $train->{cycle}{from} }, $1 ); - } - if ( length($line) > $to_offset ) { - $umlauf = substr( $line, $to_offset ); + # Fx abc appears to identify cycle groups. + # It also confuses the cycle detection code for cycles which do not have + # a "from" entry. + if ( $line =~ s{(F[0-9] [0-9]{3})}{.. ...} ) { + $cycle_id = $1; + } + if ( length($line) <= $from_offset ) { + next; + } + my $umlauf = substr( $line, $from_offset ); if ( $umlauf =~ m{ ^ (\d+) }x ) { - push( @{ $train->{cycle}{to} }, $1 ); + push( @{ $train->{cycle}{$cycle_id}{from} }, $1 ); + } + if ( length($line) > $to_offset ) { + $umlauf = substr( $line, $to_offset ); + if ( $umlauf =~ m{ ^ (\d+) }x ) { + push( @{ $train->{cycle}{$cycle_id}{to} }, $1 ); + } } } - } - if ( exists $train->{cycle} ) { - @{ $train->{cycle}{from} } = uniq @{ $train->{cycle}{from} // [] }; - @{ $train->{cycle}{to} } = uniq @{ $train->{cycle}{to} // [] }; + for my $cycle ( values %{ $train->{cycle} // {} } ) { + @{ $cycle->{from} } = uniq @{ $cycle->{from} // [] }; + @{ $cycle->{to} } = uniq @{ $cycle->{to} // [] }; + } } } -for my $train ( values %map ) { +for my $trains ( values %map ) { + for my $train ( @{$trains} ) { - if ( not $train->{details} ) { - next; - } + if ( not $train->{details} ) { + next; + } - my @details = @{ $train->{details} // [] }; - my %common_setup; - my @setups; + my @details = @{ $train->{details} // [] }; + my %common_setup; + my @setups; - for my $line (@details) { - my %setup; - if ( $line =~ m{ Tfz1:(\d+) }x ) { - $setup{series} = $1; - } - if ( $line =~ m{ Tfz2:(\d+) }x ) { - $setup{series2} = $1; - } - if ( $line =~ m{ Hg(\d+) }x ) { - $setup{vmax} = 0 + $1; - } - if ( $line =~ m{ BrH(\d+) }x ) { - $setup{brakingPercentage} = 0 + $1; - } - if ( $line =~ m{ (\d+)m }x ) { - $setup{length} = 0 + $1; - } - if ( %setup and $line =~ m{ ^ \s* ([+]? [A-Z ]{1,7}) }x ) { - my $station = $1; - $station =~ s{\s+$}{}; - my $special = ( $station =~ s{^[+]}{} ); - if ( $ds100_to_name{$station} ) { - $station = $ds100_to_name{$station}; + for my $line (@details) { + my %setup; + if ( $line =~ m{ Tfz1:(\d+) }x ) { + $setup{series} = $1; + } + if ( $line =~ m{ Tfz2:(\d+) }x ) { + $setup{series2} = $1; + } + if ( $line =~ m{ Hg(\d+) }x ) { + $setup{vmax} = 0 + $1; } - if ($special) { - $station = "+${station}"; + if ( $line =~ m{ BrH(\d+) }x ) { + $setup{brakingPercentage} = 0 + $1; } - $setup{station} = $station; + if ( $line =~ m{ (\d+)m }x ) { + $setup{length} = 0 + $1; + } + if ( %setup and $line =~ m{ ^ \s* ([+]? [A-Z ]{1,7}) }x ) { + my $station = $1; + $station =~ s{\s+$}{}; + my $special = ( $station =~ s{^[+]}{} ); + if ( $ds100_to_name{$station} ) { + $station = $ds100_to_name{$station}; + } + if ($special) { + $station = "+${station}"; + } + $setup{station} = $station; + } + push( @setups, {%setup} ); } - push( @setups, {%setup} ); - } - for my $key (qw(brakingPercentage length series series2 vmax)) { - if ( ( my @uniq = uniq grep { $_ } map { $_->{$key} } @setups ) == 1 ) { - $common_setup{$key} = $key =~ m{series} ? $uniq[0] : 0 + $uniq[0]; + for my $key (qw(brakingPercentage length series series2 vmax)) { + if ( ( my @uniq = uniq grep { $_ } map { $_->{$key} } @setups ) + == 1 ) + { + $common_setup{$key} + = $key =~ m{series} ? $uniq[0] : 0 + $uniq[0]; + } } - } - delete $train->{details}; - if (%common_setup) { - $train->{commonAttr} = {%common_setup}; + delete $train->{details}; + if (%common_setup) { + $train->{commonAttr} = {%common_setup}; + } + $train->{attrVariants} = [@setups]; } - $train->{attrVariants} = [@setups]; } -for my $train_number ( keys %map ) { - my $wagon_numbers_ok = 1; - my $wagon_types_ok = 1; - my $wagon_list_ok = 1; - my %bincount; - my %type_by_wagon_number; - for my $wagon ( @{ $wagon_map{$train_number} // [] } ) { - my ( $wagon_type, $wagon_number ) = @{$wagon}; - if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) { - - # Locomotives do not have a wagon number. That's OK. - next; - } - elsif ( not $wagon_number ) { - $wagon_numbers_ok = 0; - } - else { - $bincount{$wagon_number}++; - push( @{ $type_by_wagon_number{$wagon_number} }, $wagon_type ); - } - } - if ($wagon_numbers_ok) { - for my $wagon ( @{ $wagon_map{$train_number} // [] } ) { +for my $trains ( values %map ) { + for my $train ( @{$trains} ) { + my $wagon_numbers_ok = 1; + my $wagon_types_ok = 1; + my $wagon_list_ok = 1; + my %bincount; + my %type_by_wagon_number; + for my $wagon ( @{ $train->{raw_wagons} // [] } ) { my ( $wagon_type, $wagon_number ) = @{$wagon}; + if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) { - my $json_wagon = { type => $wagon_type }; - if ($wagon_number) { - $json_wagon->{number} = 0 + $wagon_number; + # Locomotives do not have a wagon number. That's OK. + next; } + elsif ( not $wagon_number ) { + $wagon_numbers_ok = 0; + } + else { + $bincount{$wagon_number}++; + push( @{ $type_by_wagon_number{$wagon_number} }, $wagon_type ); + } + } + if ($wagon_numbers_ok) { + for my $wagon ( @{ $train->{raw_wagons} // [] } ) { + my ( $wagon_type, $wagon_number ) = @{$wagon}; - push( @{ $map{$train_number}{wagons} }, $json_wagon ); + my $json_wagon = { type => $wagon_type }; + if ($wagon_number) { + $json_wagon->{number} = 0 + $wagon_number; + } - if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) { - next; - } + push( @{ $train->{wagons} }, $json_wagon ); - if ( $bincount{$wagon_number} > 1 ) { - $wagon_list_ok = 0; - if ( $type_by_wagon_number{$wagon_number}[0] ne - $type_by_wagon_number{$wagon_number}[1] ) - { - $wagon_types_ok = 0; + if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) { + next; + } + + if ( $bincount{$wagon_number} > 1 ) { + $wagon_list_ok = 0; + if ( $type_by_wagon_number{$wagon_number}[0] ne + $type_by_wagon_number{$wagon_number}[1] ) + { + $wagon_types_ok = 0; + } } } } - } - if ( not $wagon_types_ok ) { + if ( not $wagon_types_ok ) { - # train type may differ depending on date of week or similar. We don't - # parse/handle that yet. - $map{$train_number}{type} = $map{$train_number}{rawType}; - delete $map{$train_number}{shortType}; - delete $map{$train_number}{wagons}; - delete $map{$train_number}{hasWagon}; - } - elsif ( not $wagon_list_ok ) { + # train type may differ depending on date of week or similar. We don't + # parse/handle that yet. + $train->{type} = $train->{rawType}; + delete $train->{shortType}; + delete $train->{wagons}; + delete $train->{hasWagon}; + } + elsif ( not $wagon_list_ok ) { - # train type appears to be correct, but the list contains duplicates - # (e.g. due to unhandled schedule variants) - delete $map{$train_number}{wagons}; + # train type appears to be correct, but the list contains duplicates + # (e.g. due to unhandled schedule variants) + delete $train->{wagons}; + } } } # Cleanup -for my $train ( values %map ) { - delete $train->{wagonorder_notes}; +for my $trains ( values %map ) { + for my $train ( @{$trains} ) { + delete $train->{raw_wagons}; + delete $train->{wagonorder_notes}; + } } -# broken umlauf (indentation changes on page break) -delete $map{104}{cycle}; -delete $map{1588}{cycle}; -delete $map{1700}{cycle}; -delete $map{77689}{cycle}; -delete $map{78112}{cycle}; +for my $bork (qw(104 1588 1700 77689 78112 939 2310)) { + for my $train ( @{ $map{$bork} } ) { + delete $train->{cycle}; + } +} -# indentation change between consecutive lines. wat. -delete $map{939}{cycle}; -delete $map{2310}{cycle}; +my %smap; +for my $train_no ( keys %map ) { + if ( @{ $map{$train_no} } == 1 ) { + $smap{$train_no} = $map{$train_no}[0]; + } + else { + my $latest_valid; + my $valid_count = 0; + for my $train ( @{ $map{$train_no} } ) { + my $is_valid = 0; + for my $schedule ( @{ $train->{schedules} // [] } ) { + for my $valid ( @{ $schedule->{valid} // [] } ) { + if ( range_is_today($valid) ) { + $is_valid = 1; + } + } + + # invalid may override valid for certain days + for my $invalid ( @{ $schedule->{invalid} // [] } ) { + if ( range_is_today($invalid) ) { + $is_valid = 0; + } + } + } + if ($is_valid) { + $latest_valid = $train; + $valid_count++; + } + } + if ( $valid_count == 1 ) { + $smap{$train_no} = $latest_valid; + } + } +} # use canonical output (i.e., sort hash keys) to allow for easy diffing. say JSON->new->utf8->canonical->encode( { - deprecated => \0, - source => $wr_name, - train => {%map}, - valid => $valid, + deprecated => \0, + source => $wr_name, + train => {%smap}, + train_variants => {%map}, + valid => $valid, } ); diff --git a/bin/umlauf-to-dot b/bin/umlauf-to-dot index 69472d8..47a766c 100755 --- a/bin/umlauf-to-dot +++ b/bin/umlauf-to-dot @@ -52,9 +52,10 @@ my $json = JSON->new->utf8->decode($file_content); my $map = $json->{train}; -my @train_numbers; -if ($line) { +sub build_cycle { + my ( $line, $cycle_id ) = @_; my @queue = ( [ $line, 0 ] ); + my @train_numbers; while (@queue) { my ( $train_number, $distance ) = @{ pop @queue }; @@ -65,8 +66,12 @@ if ($line) { } my @candidates; - push( @candidates, @{ $map->{$train_number}{cycle}{from} // [] } ); - push( @candidates, @{ $map->{$train_number}{cycle}{to} // [] } ); + + my $train = $map->{$train_number}; + if ( my $c = $train->{cycle}{$cycle_id} ) { + push( @candidates, @{ $c->{from} // [] } ); + push( @candidates, @{ $c->{to} // [] } ); + } @candidates = uniq @candidates; @candidates @@ -79,32 +84,51 @@ if ($line) { push( @queue, map { [ $_, $distance + 1 ] } @candidates ); } -} -else { - @train_numbers = keys %{$map}; -} -my @output; - -for my $train_number (@train_numbers) { - for my $from ( @{ $map->{$train_number}{cycle}{from} } ) { - push( - @output, - sprintf( "%s -> %s;", - format_train( $from, $map->{$from} ), - format_train( $train_number, $map->{$train_number} ) ) - ); - } - for my $to ( @{ $map->{$train_number}{cycle}{to} } ) { - push( - @output, - sprintf( "%s -> %s;", - format_train( $train_number, $map->{$train_number} ), - format_train( $to, $map->{$to} ) ) - ); + my @output; + + for my $train_number (@train_numbers) { + my $train = $map->{$train_number}; + if ( my $c = $train->{cycle}{$cycle_id} ) { + for my $from ( @{ $c->{from} // [] } ) { + push( + @output, + sprintf( "%s -> %s;", + format_train( $from, $map->{$from} ), + format_train( $train_number, $map->{$train_number} ) ) + ); + } + for my $to ( @{ $c->{to} // [] } ) { + push( + @output, + sprintf( "%s -> %s;", + format_train( $train_number, $map->{$train_number} ), + format_train( $to, $map->{$to} ) ) + ); + } + } + if ( $train_number != $line ) { + push( + @output, + sprintf( "%s [shape=box];", + format_train( $train_number, $map->{$train_number} ) ) + ); + } } + + return @output; } +my @cycle_ids; + +push( @cycle_ids, keys %{ $map->{$line}{cycle} // {} } ); + say "digraph Umlauf {"; -say join( "\n", uniq @output ); + +for my $cycle_id (@cycle_ids) { + say join( "\n", uniq build_cycle( $line, $cycle_id ) ); +} + +printf( "%s [style=bold];\n", format_train( $line, $map->{$line} ) ); + say "}" @@ -38,9 +38,16 @@ components: description: ISO 8601 interval describing when this train composition dataset is valid train: type: object - description: dict mapping train numbers to train objects + description: dict mapping train numbers to probably valid train objects additionalProperties: $ref: '#/components/schemas/train' + train_variants: + type: object + description: dict mapping train numbers to list of possible train objects + additionalProperties: + type: array + items: + $ref: '#/components/schemas/train' train: type: object properties: @@ -128,15 +135,17 @@ components: description: rated maximum speed in km/h cycle: type: object - properties: - from: - type: array - items: - type: string - to: - type: array - items: - type: string + additionalProperties: + type: object + properties: + from: + type: array + items: + type: string + to: + type: array + items: + type: string hasWagon: type: object additionalProperties: |