summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <derf@finalrewind.org>2021-01-11 21:23:31 +0100
committerDaniel Friesel <derf@finalrewind.org>2021-01-11 21:23:31 +0100
commitaaee1881495d0e5bae0e4027b48d2d62d6d5c0f1 (patch)
tree700a5857594eb8cd6018185bd2eab879934f0567
parentc6270546445c1c3954636a1c92d882d1b4b379bd (diff)
do not silently ignore duplicate train entries (wip)
-rwxr-xr-xbin/db-zugbildung-to-json397
-rwxr-xr-xbin/umlauf-to-dot60
-rw-r--r--schema.yml4
3 files changed, 255 insertions, 206 deletions
diff --git a/bin/db-zugbildung-to-json b/bin/db-zugbildung-to-json
index b1f804d..4721d31 100755
--- a/bin/db-zugbildung-to-json
+++ b/bin/db-zugbildung-to-json
@@ -247,7 +247,6 @@ my @lines = split( qr{\n}, $wr_txt );
my ( $type, $number );
my %map;
-my %wagon_map;
my $state = "intro";
@@ -294,9 +293,24 @@ for my $line (@lines) {
$state = "train_no";
#say "$type $number";
- $map{$number} = {
- rawType => $type,
- };
+
+ if ( exists $map{$number} ) {
+
+ # train numbers are not unique (they may have several wagon
+ # orders with non-intersecting date ranges)
+ my @dup = @{ $map{$number}{dup} // [] };
+ delete $map{$number}{dup};
+ push( @dup, $map{$number} );
+ $map{$number} = {
+ rawType => $type,
+ dup => [@dup],
+ };
+ }
+ else {
+ $map{$number} = {
+ rawType => $type,
+ };
+ }
if ($name) {
$map{$number}{name} = $name;
@@ -442,7 +456,7 @@ for my $line (@lines) {
my $wagon_number = $+{number};
my $rest = $+{rest};
- push( @{ $wagon_map{$number} }, [ $wagon_type, $wagon_number ] );
+ push( @{ $map{$number}{raw_wagons} }, [ $wagon_type, $wagon_number ] );
if ( $rest and $rest =~ m{\S} ) {
push( @{ $map{$number}{wagonorder_notes} }, $line );
@@ -463,237 +477,254 @@ for my $line (@lines) {
}
}
-for my $train ( values %map ) {
- if ( not $train->{route} ) {
- next;
- }
+for my $train_no ( keys %map ) {
+ my @entries = @{ $map{$train_no}{dup} // [] };
+ delete $map{$train_no}{dup};
+ push( @entries, $map{$train_no} );
+ $map{$train_no} = [@entries];
+}
+
+for my $trains ( values %map ) {
+ for my $train ( @{$trains} ) {
+ if ( not $train->{route} ) {
+ next;
+ }
- my $route = $train->{route};
- my @parts = split( qr{ - }, $route );
+ my $route = $train->{route};
+ my @parts = split( qr{ - }, $route );
- if ( @parts < 2 ) {
- $train->{route} = { raw => $route };
- next;
- }
- my ( $pre_start, $start, @middle, $end, $post_end );
+ if ( @parts < 2 ) {
+ $train->{route} = { raw => $route };
+ next;
+ }
+ my ( $pre_start, $start, @middle, $end, $post_end );
- if ( $parts[0] =~ m{ ^ [(] }x ) {
- $pre_start = shift @parts;
- $pre_start =~ s{ ^ [(] (.*) [)] $ }{$1}x;
- }
+ if ( $parts[0] =~ m{ ^ [(] }x ) {
+ $pre_start = shift @parts;
+ $pre_start =~ s{ ^ [(] (.*) [)] $ }{$1}x;
+ }
- if ( $parts[-1] =~ m{ ^ [(] }x ) {
- $post_end = pop @parts;
- $post_end =~ s{ ^ [(] (.*) [)] $ }{$1}x;
- }
+ if ( $parts[-1] =~ m{ ^ [(] }x ) {
+ $post_end = pop @parts;
+ $post_end =~ s{ ^ [(] (.*) [)] $ }{$1}x;
+ }
- $start = shift @parts;
- $end = pop @parts;
+ $start = shift @parts;
+ $end = pop @parts;
- $train->{route} = {
- preStart => $pre_start,
- start => $start,
- middle => scalar @parts ? [@parts] : undef,
- end => $end,
- postEnd => $post_end,
- };
+ $train->{route} = {
+ preStart => $pre_start,
+ start => $start,
+ middle => scalar @parts ? [@parts] : undef,
+ end => $end,
+ postEnd => $post_end,
+ };
- for my $k ( keys %{ $train->{route} } ) {
- if ( not defined $train->{route}{$k} ) {
+ for my $k ( keys %{ $train->{route} } ) {
+ if ( not defined $train->{route}{$k} ) {
- # avoid null values (leave out the property instead)
- delete $train->{route}{$k};
+ # avoid null values (leave out the property instead)
+ delete $train->{route}{$k};
+ }
}
}
}
-for my $train ( values %map ) {
- if ( @{ $train->{wagonorder_notes} // [] } == 0 ) {
- next;
- }
- my $first_line = $train->{wagonorder_notes}[0];
- my $from_offset = 0;
- my $to_offset = 0;
+for my $trains ( values %map ) {
+ for my $train ( @{$trains} ) {
+ if ( @{ $train->{wagonorder_notes} // [] } == 0 ) {
+ next;
+ }
+ my $first_line = $train->{wagonorder_notes}[0];
+ my $from_offset = 0;
+ my $to_offset = 0;
- # International trains (e.g. EC 8) are super special and may have another
- # identifier after the "cycle to" train number.
- if ( $first_line
- =~ m{ ^ (?<lead> .*? [^0-9] ) (?<from> \d+ ) (?<middle> [A-Z ]+ ) (?<to> \d+ ) (?: \s+ \d+ )? $ }x
- )
- {
- $from_offset = length( $+{lead} );
- $to_offset = $from_offset + length( $+{from} ) + length( $+{middle} );
- }
- if ( not $from_offset ) {
- next;
- }
- my $cycle_id = "unknown";
- for my $i_line ( @{ $train->{wagonorder_notes} } ) {
+ # International trains (e.g. EC 8) are super special and may have another
+ # identifier after the "cycle to" train number.
+ if ( $first_line
+ =~ m{ ^ (?<lead> .*? [^0-9] ) (?<from> \d+ ) (?<middle> [A-Z ]+ ) (?<to> \d+ ) (?: \s+ \d+ )? $ }x
+ )
+ {
+ $from_offset = length( $+{lead} );
+ $to_offset
+ = $from_offset + length( $+{from} ) + length( $+{middle} );
+ }
+ if ( not $from_offset ) {
+ next;
+ }
+ my $cycle_id = "unknown";
+ for my $i_line ( @{ $train->{wagonorder_notes} } ) {
# $i_line is an lvalue, so changes in $i_line end up in wagonorder_notes.
# We don't want that.
- my $line = substr( $i_line, 0 );
+ my $line = substr( $i_line, 0 );
- # Fx abc appears to identify cycle groups.
- # It also confuses the cycle detection code for cycles which do not have
- # a "from" entry.
- if ( $line =~ s{(F[0-9] [0-9]{3})}{.. ...} ) {
- $cycle_id = $1;
- }
- if ( length($line) <= $from_offset ) {
- next;
- }
- my $umlauf = substr( $line, $from_offset );
- if ( $umlauf =~ m{ ^ (\d+) }x ) {
- push( @{ $train->{cycle}{$cycle_id}{from} }, $1 );
- }
- if ( length($line) > $to_offset ) {
- $umlauf = substr( $line, $to_offset );
+ # Fx abc appears to identify cycle groups.
+ # It also confuses the cycle detection code for cycles which do not have
+ # a "from" entry.
+ if ( $line =~ s{(F[0-9] [0-9]{3})}{.. ...} ) {
+ $cycle_id = $1;
+ }
+ if ( length($line) <= $from_offset ) {
+ next;
+ }
+ my $umlauf = substr( $line, $from_offset );
if ( $umlauf =~ m{ ^ (\d+) }x ) {
- push( @{ $train->{cycle}{$cycle_id}{to} }, $1 );
+ push( @{ $train->{cycle}{$cycle_id}{from} }, $1 );
+ }
+ if ( length($line) > $to_offset ) {
+ $umlauf = substr( $line, $to_offset );
+ if ( $umlauf =~ m{ ^ (\d+) }x ) {
+ push( @{ $train->{cycle}{$cycle_id}{to} }, $1 );
+ }
}
}
- }
- for my $cycle ( values %{ $train->{cycle} // {} } ) {
- @{ $cycle->{from} } = uniq @{ $cycle->{from} // [] };
- @{ $cycle->{to} } = uniq @{ $cycle->{to} // [] };
+ for my $cycle ( values %{ $train->{cycle} // {} } ) {
+ @{ $cycle->{from} } = uniq @{ $cycle->{from} // [] };
+ @{ $cycle->{to} } = uniq @{ $cycle->{to} // [] };
+ }
}
}
-for my $train ( values %map ) {
+for my $trains ( values %map ) {
+ for my $train ( @{$trains} ) {
- if ( not $train->{details} ) {
- next;
- }
+ if ( not $train->{details} ) {
+ next;
+ }
- my @details = @{ $train->{details} // [] };
- my %common_setup;
- my @setups;
+ my @details = @{ $train->{details} // [] };
+ my %common_setup;
+ my @setups;
- for my $line (@details) {
- my %setup;
- if ( $line =~ m{ Tfz1:(\d+) }x ) {
- $setup{series} = $1;
- }
- if ( $line =~ m{ Tfz2:(\d+) }x ) {
- $setup{series2} = $1;
- }
- if ( $line =~ m{ Hg(\d+) }x ) {
- $setup{vmax} = 0 + $1;
- }
- if ( $line =~ m{ BrH(\d+) }x ) {
- $setup{brakingPercentage} = 0 + $1;
- }
- if ( $line =~ m{ (\d+)m }x ) {
- $setup{length} = 0 + $1;
- }
- if ( %setup and $line =~ m{ ^ \s* ([+]? [A-Z ]{1,7}) }x ) {
- my $station = $1;
- $station =~ s{\s+$}{};
- my $special = ( $station =~ s{^[+]}{} );
- if ( $ds100_to_name{$station} ) {
- $station = $ds100_to_name{$station};
+ for my $line (@details) {
+ my %setup;
+ if ( $line =~ m{ Tfz1:(\d+) }x ) {
+ $setup{series} = $1;
+ }
+ if ( $line =~ m{ Tfz2:(\d+) }x ) {
+ $setup{series2} = $1;
+ }
+ if ( $line =~ m{ Hg(\d+) }x ) {
+ $setup{vmax} = 0 + $1;
}
- if ($special) {
- $station = "+${station}";
+ if ( $line =~ m{ BrH(\d+) }x ) {
+ $setup{brakingPercentage} = 0 + $1;
}
- $setup{station} = $station;
+ if ( $line =~ m{ (\d+)m }x ) {
+ $setup{length} = 0 + $1;
+ }
+ if ( %setup and $line =~ m{ ^ \s* ([+]? [A-Z ]{1,7}) }x ) {
+ my $station = $1;
+ $station =~ s{\s+$}{};
+ my $special = ( $station =~ s{^[+]}{} );
+ if ( $ds100_to_name{$station} ) {
+ $station = $ds100_to_name{$station};
+ }
+ if ($special) {
+ $station = "+${station}";
+ }
+ $setup{station} = $station;
+ }
+ push( @setups, {%setup} );
}
- push( @setups, {%setup} );
- }
- for my $key (qw(brakingPercentage length series series2 vmax)) {
- if ( ( my @uniq = uniq grep { $_ } map { $_->{$key} } @setups ) == 1 ) {
- $common_setup{$key} = $key =~ m{series} ? $uniq[0] : 0 + $uniq[0];
+ for my $key (qw(brakingPercentage length series series2 vmax)) {
+ if ( ( my @uniq = uniq grep { $_ } map { $_->{$key} } @setups )
+ == 1 )
+ {
+ $common_setup{$key}
+ = $key =~ m{series} ? $uniq[0] : 0 + $uniq[0];
+ }
}
- }
- delete $train->{details};
- if (%common_setup) {
- $train->{commonAttr} = {%common_setup};
+ delete $train->{details};
+ if (%common_setup) {
+ $train->{commonAttr} = {%common_setup};
+ }
+ $train->{attrVariants} = [@setups];
}
- $train->{attrVariants} = [@setups];
}
-for my $train_number ( keys %map ) {
- my $wagon_numbers_ok = 1;
- my $wagon_types_ok = 1;
- my $wagon_list_ok = 1;
- my %bincount;
- my %type_by_wagon_number;
- for my $wagon ( @{ $wagon_map{$train_number} // [] } ) {
- my ( $wagon_type, $wagon_number ) = @{$wagon};
- if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) {
-
- # Locomotives do not have a wagon number. That's OK.
- next;
- }
- elsif ( not $wagon_number ) {
- $wagon_numbers_ok = 0;
- }
- else {
- $bincount{$wagon_number}++;
- push( @{ $type_by_wagon_number{$wagon_number} }, $wagon_type );
- }
- }
- if ($wagon_numbers_ok) {
- for my $wagon ( @{ $wagon_map{$train_number} // [] } ) {
+for my $trains ( values %map ) {
+ for my $train ( @{$trains} ) {
+ my $wagon_numbers_ok = 1;
+ my $wagon_types_ok = 1;
+ my $wagon_list_ok = 1;
+ my %bincount;
+ my %type_by_wagon_number;
+ for my $wagon ( @{ $train->{raw_wagons} // [] } ) {
my ( $wagon_type, $wagon_number ) = @{$wagon};
+ if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) {
- my $json_wagon = { type => $wagon_type };
- if ($wagon_number) {
- $json_wagon->{number} = 0 + $wagon_number;
+ # Locomotives do not have a wagon number. That's OK.
+ next;
+ }
+ elsif ( not $wagon_number ) {
+ $wagon_numbers_ok = 0;
}
+ else {
+ $bincount{$wagon_number}++;
+ push( @{ $type_by_wagon_number{$wagon_number} }, $wagon_type );
+ }
+ }
+ if ($wagon_numbers_ok) {
+ for my $wagon ( @{ $train->{raw_wagons} // [] } ) {
+ my ( $wagon_type, $wagon_number ) = @{$wagon};
- push( @{ $map{$train_number}{wagons} }, $json_wagon );
+ my $json_wagon = { type => $wagon_type };
+ if ($wagon_number) {
+ $json_wagon->{number} = 0 + $wagon_number;
+ }
- if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) {
- next;
- }
+ push( @{ $train->{wagons} }, $json_wagon );
+
+ if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) {
+ next;
+ }
- if ( $bincount{$wagon_number} > 1 ) {
- $wagon_list_ok = 0;
- if ( $type_by_wagon_number{$wagon_number}[0] ne
- $type_by_wagon_number{$wagon_number}[1] )
- {
- $wagon_types_ok = 0;
+ if ( $bincount{$wagon_number} > 1 ) {
+ $wagon_list_ok = 0;
+ if ( $type_by_wagon_number{$wagon_number}[0] ne
+ $type_by_wagon_number{$wagon_number}[1] )
+ {
+ $wagon_types_ok = 0;
+ }
}
}
}
- }
- if ( not $wagon_types_ok ) {
+ if ( not $wagon_types_ok ) {
- # train type may differ depending on date of week or similar. We don't
- # parse/handle that yet.
- $map{$train_number}{type} = $map{$train_number}{rawType};
- delete $map{$train_number}{shortType};
- delete $map{$train_number}{wagons};
- delete $map{$train_number}{hasWagon};
- }
- elsif ( not $wagon_list_ok ) {
+ # train type may differ depending on date of week or similar. We don't
+ # parse/handle that yet.
+ $train->{type} = $train->{rawType};
+ delete $train->{shortType};
+ delete $train->{wagons};
+ delete $train->{hasWagon};
+ }
+ elsif ( not $wagon_list_ok ) {
- # train type appears to be correct, but the list contains duplicates
- # (e.g. due to unhandled schedule variants)
- delete $map{$train_number}{wagons};
+ # train type appears to be correct, but the list contains duplicates
+ # (e.g. due to unhandled schedule variants)
+ delete $train->{wagons};
+ }
}
}
# Cleanup
-for my $train ( values %map ) {
- delete $train->{wagonorder_notes};
+for my $trains ( values %map ) {
+ for my $train ( @{$trains} ) {
+ delete $train->{raw_wagons};
+ delete $train->{wagonorder_notes};
+ }
}
-# broken umlauf (indentation changes on page break)
-delete $map{104}{cycle};
-delete $map{1588}{cycle};
-delete $map{1700}{cycle};
-delete $map{77689}{cycle};
-delete $map{78112}{cycle};
-
-# indentation change between consecutive lines. wat.
-delete $map{939}{cycle};
-delete $map{2310}{cycle};
+for my $bork (qw(104 1588 1700 77689 78112 939 2310)) {
+ for my $train ( @{ $map{$bork} } ) {
+ delete $train->{cycle};
+ }
+}
# use canonical output (i.e., sort hash keys) to allow for easy diffing.
say JSON->new->utf8->canonical->encode(
diff --git a/bin/umlauf-to-dot b/bin/umlauf-to-dot
index 1ac2c2e..860c962 100755
--- a/bin/umlauf-to-dot
+++ b/bin/umlauf-to-dot
@@ -67,9 +67,11 @@ sub build_cycle {
my @candidates;
- if ( my $c = $map->{$train_number}{cycle}{$cycle_id} ) {
- push( @candidates, @{ $c->{from} // [] } );
- push( @candidates, @{ $c->{to} // [] } );
+ for my $train ( @{ $map->{$train_number} } ) {
+ if ( my $c = $train->{cycle}{$cycle_id} ) {
+ push( @candidates, @{ $c->{from} // [] } );
+ push( @candidates, @{ $c->{to} // [] } );
+ }
}
@candidates = uniq @candidates;
@@ -87,29 +89,39 @@ sub build_cycle {
my @output;
for my $train_number (@train_numbers) {
- if ( my $c = $map->{$train_number}{cycle}{$cycle_id} ) {
- for my $from ( @{ $c->{from} // [] } ) {
- push(
- @output,
- sprintf( "%s -> %s;",
- format_train( $from, $map->{$from} ),
- format_train( $train_number, $map->{$train_number} ) )
- );
- }
- for my $to ( @{ $c->{to} // [] } ) {
- push(
- @output,
- sprintf( "%s -> %s;",
- format_train( $train_number, $map->{$train_number} ),
- format_train( $to, $map->{$to} ) )
- );
+ for my $train ( @{ $map->{$train_number} } ) {
+ if ( my $c = $train->{cycle}{$cycle_id} ) {
+ for my $from ( @{ $c->{from} // [] } ) {
+ push(
+ @output,
+ sprintf(
+ "%s -> %s;",
+ format_train( $from, $map->{$from}[0] ),
+ format_train(
+ $train_number, $map->{$train_number}[0]
+ )
+ )
+ );
+ }
+ for my $to ( @{ $c->{to} // [] } ) {
+ push(
+ @output,
+ sprintf(
+ "%s -> %s;",
+ format_train(
+ $train_number, $map->{$train_number}[0]
+ ),
+ format_train( $to, $map->{$to}[0] )
+ )
+ );
+ }
}
}
if ( $train_number != $line ) {
push(
@output,
sprintf( "%s [shape=box];",
- format_train( $train_number, $map->{$train_number} ) )
+ format_train( $train_number, $map->{$train_number}[0] ) )
);
}
}
@@ -117,7 +129,11 @@ sub build_cycle {
return @output;
}
-my @cycle_ids = keys %{ $map->{$line}{cycle} // {} };
+my @cycle_ids;
+
+for my $train ( @{ $map->{$line} } ) {
+ push( @cycle_ids, keys %{ $train->{cycle} // {} } );
+}
say "digraph Umlauf {";
@@ -125,6 +141,6 @@ for my $cycle_id (@cycle_ids) {
say join( "\n", uniq build_cycle( $line, $cycle_id ) );
}
-printf( "%s [style=bold];\n", format_train( $line, $map->{$line} ) );
+printf( "%s [style=bold];\n", format_train( $line, $map->{$line}[0] ) );
say "}"
diff --git a/schema.yml b/schema.yml
index 60f5b15..0cbc85d 100644
--- a/schema.yml
+++ b/schema.yml
@@ -40,7 +40,9 @@ components:
type: object
description: dict mapping train numbers to train objects
additionalProperties:
- $ref: '#/components/schemas/train'
+ type: array
+ items:
+ $ref: '#/components/schemas/train'
train:
type: object
properties: