From c6270546445c1c3954636a1c92d882d1b4b379bd Mon Sep 17 00:00:00 2001
From: Daniel Friesel <derf@finalrewind.org>
Date: Mon, 11 Jan 2021 19:31:35 +0100
Subject: respect cycle IDs when parsing cycle refs and building graphs

---
 bin/db-zugbildung-to-json | 20 ++++++++-----
 bin/umlauf-to-dot         | 74 ++++++++++++++++++++++++++++++-----------------
 schema.yml                | 20 +++++++------
 3 files changed, 70 insertions(+), 44 deletions(-)

diff --git a/bin/db-zugbildung-to-json b/bin/db-zugbildung-to-json
index d6bd897..b1f804d 100755
--- a/bin/db-zugbildung-to-json
+++ b/bin/db-zugbildung-to-json
@@ -527,32 +527,36 @@ for my $train ( values %map ) {
 	if ( not $from_offset ) {
 		next;
 	}
+	my $cycle_id = "unknown";
 	for my $i_line ( @{ $train->{wagonorder_notes} } ) {
 
        # $i_line is an lvalue, so changes in $i_line end up in wagonorder_notes.
        # We don't want that.
 		my $line = substr( $i_line, 0 );
 
-		# Some cycles do not have a "from" entry, but they do have an odd
-		# F-identifier, which confuses the cycle detection code. remove it.
-		$line =~ s{F[0-9] [0-9]{3}}{.. ...};
+		# Fx abc appears to identify cycle groups.
+		# It also confuses the cycle detection code for cycles which do not have
+		# a "from" entry.
+		if ( $line =~ s{(F[0-9] [0-9]{3})}{.. ...} ) {
+			$cycle_id = $1;
+		}
 		if ( length($line) <= $from_offset ) {
 			next;
 		}
 		my $umlauf = substr( $line, $from_offset );
 		if ( $umlauf =~ m{ ^ (\d+) }x ) {
-			push( @{ $train->{cycle}{from} }, $1 );
+			push( @{ $train->{cycle}{$cycle_id}{from} }, $1 );
 		}
 		if ( length($line) > $to_offset ) {
 			$umlauf = substr( $line, $to_offset );
 			if ( $umlauf =~ m{ ^ (\d+) }x ) {
-				push( @{ $train->{cycle}{to} }, $1 );
+				push( @{ $train->{cycle}{$cycle_id}{to} }, $1 );
 			}
 		}
 	}
-	if ( exists $train->{cycle} ) {
-		@{ $train->{cycle}{from} } = uniq @{ $train->{cycle}{from} // [] };
-		@{ $train->{cycle}{to} }   = uniq @{ $train->{cycle}{to}   // [] };
+	for my $cycle ( values %{ $train->{cycle} // {} } ) {
+		@{ $cycle->{from} } = uniq @{ $cycle->{from} // [] };
+		@{ $cycle->{to} }   = uniq @{ $cycle->{to}   // [] };
 	}
 }
 
diff --git a/bin/umlauf-to-dot b/bin/umlauf-to-dot
index 69472d8..1ac2c2e 100755
--- a/bin/umlauf-to-dot
+++ b/bin/umlauf-to-dot
@@ -52,9 +52,10 @@ my $json         = JSON->new->utf8->decode($file_content);
 
 my $map = $json->{train};
 
-my @train_numbers;
-if ($line) {
+sub build_cycle {
+	my ( $line, $cycle_id ) = @_;
 	my @queue = ( [ $line, 0 ] );
+	my @train_numbers;
 
 	while (@queue) {
 		my ( $train_number, $distance ) = @{ pop @queue };
@@ -65,8 +66,11 @@ if ($line) {
 		}
 
 		my @candidates;
-		push( @candidates, @{ $map->{$train_number}{cycle}{from} // [] } );
-		push( @candidates, @{ $map->{$train_number}{cycle}{to}   // [] } );
+
+		if ( my $c = $map->{$train_number}{cycle}{$cycle_id} ) {
+			push( @candidates, @{ $c->{from} // [] } );
+			push( @candidates, @{ $c->{to}   // [] } );
+		}
 
 		@candidates = uniq @candidates;
 		@candidates
@@ -79,32 +83,48 @@ if ($line) {
 
 		push( @queue, map { [ $_, $distance + 1 ] } @candidates );
 	}
-}
-else {
-	@train_numbers = keys %{$map};
-}
 
-my @output;
-
-for my $train_number (@train_numbers) {
-	for my $from ( @{ $map->{$train_number}{cycle}{from} } ) {
-		push(
-			@output,
-			sprintf( "%s -> %s;",
-				format_train( $from,         $map->{$from} ),
-				format_train( $train_number, $map->{$train_number} ) )
-		);
-	}
-	for my $to ( @{ $map->{$train_number}{cycle}{to} } ) {
-		push(
-			@output,
-			sprintf( "%s -> %s;",
-				format_train( $train_number, $map->{$train_number} ),
-				format_train( $to,           $map->{$to} ) )
-		);
+	my @output;
+
+	for my $train_number (@train_numbers) {
+		if ( my $c = $map->{$train_number}{cycle}{$cycle_id} ) {
+			for my $from ( @{ $c->{from} // [] } ) {
+				push(
+					@output,
+					sprintf( "%s -> %s;",
+						format_train( $from,         $map->{$from} ),
+						format_train( $train_number, $map->{$train_number} ) )
+				);
+			}
+			for my $to ( @{ $c->{to} // [] } ) {
+				push(
+					@output,
+					sprintf( "%s -> %s;",
+						format_train( $train_number, $map->{$train_number} ),
+						format_train( $to,           $map->{$to} ) )
+				);
+			}
+		}
+		if ( $train_number != $line ) {
+			push(
+				@output,
+				sprintf( "%s [shape=box];",
+					format_train( $train_number, $map->{$train_number} ) )
+			);
+		}
 	}
+
+	return @output;
 }
 
+my @cycle_ids = keys %{ $map->{$line}{cycle} // {} };
+
 say "digraph Umlauf {";
-say join( "\n", uniq @output );
+
+for my $cycle_id (@cycle_ids) {
+	say join( "\n", uniq build_cycle( $line, $cycle_id ) );
+}
+
+printf( "%s [style=bold];\n", format_train( $line, $map->{$line} ) );
+
 say "}"
diff --git a/schema.yml b/schema.yml
index 44869d1..60f5b15 100644
--- a/schema.yml
+++ b/schema.yml
@@ -128,15 +128,17 @@ components:
           description: rated maximum speed in km/h
     cycle:
       type: object
-      properties:
-        from:
-          type: array
-          items:
-            type: string
-        to:
-          type: array
-          items:
-            type: string
+      additionalProperties:
+        type: object
+        properties:
+          from:
+            type: array
+            items:
+              type: string
+          to:
+            type: array
+            items:
+              type: string
     hasWagon:
       type: object
       additionalProperties:
-- 
cgit v1.2.3


From aaee1881495d0e5bae0e4027b48d2d62d6d5c0f1 Mon Sep 17 00:00:00 2001
From: Daniel Friesel <derf@finalrewind.org>
Date: Mon, 11 Jan 2021 21:23:31 +0100
Subject: do not silently ignore duplicate train entries (wip)

---
 bin/db-zugbildung-to-json | 397 +++++++++++++++++++++++++---------------------
 bin/umlauf-to-dot         |  60 ++++---
 schema.yml                |   4 +-
 3 files changed, 255 insertions(+), 206 deletions(-)

diff --git a/bin/db-zugbildung-to-json b/bin/db-zugbildung-to-json
index b1f804d..4721d31 100755
--- a/bin/db-zugbildung-to-json
+++ b/bin/db-zugbildung-to-json
@@ -247,7 +247,6 @@ my @lines = split( qr{\n}, $wr_txt );
 
 my ( $type, $number );
 my %map;
-my %wagon_map;
 
 my $state = "intro";
 
@@ -294,9 +293,24 @@ for my $line (@lines) {
 		$state = "train_no";
 
 		#say "$type $number";
-		$map{$number} = {
-			rawType => $type,
-		};
+
+		if ( exists $map{$number} ) {
+
+			# train numbers are not unique (they may have several wagon
+			# orders with non-intersecting date ranges)
+			my @dup = @{ $map{$number}{dup} // [] };
+			delete $map{$number}{dup};
+			push( @dup, $map{$number} );
+			$map{$number} = {
+				rawType => $type,
+				dup     => [@dup],
+			};
+		}
+		else {
+			$map{$number} = {
+				rawType => $type,
+			};
+		}
 
 		if ($name) {
 			$map{$number}{name} = $name;
@@ -442,7 +456,7 @@ for my $line (@lines) {
 		my $wagon_number = $+{number};
 		my $rest         = $+{rest};
 
-		push( @{ $wagon_map{$number} }, [ $wagon_type, $wagon_number ] );
+		push( @{ $map{$number}{raw_wagons} }, [ $wagon_type, $wagon_number ] );
 
 		if ( $rest and $rest =~ m{\S} ) {
 			push( @{ $map{$number}{wagonorder_notes} }, $line );
@@ -463,237 +477,254 @@ for my $line (@lines) {
 	}
 }
 
-for my $train ( values %map ) {
-	if ( not $train->{route} ) {
-		next;
-	}
+for my $train_no ( keys %map ) {
+	my @entries = @{ $map{$train_no}{dup} // [] };
+	delete $map{$train_no}{dup};
+	push( @entries, $map{$train_no} );
+	$map{$train_no} = [@entries];
+}
+
+for my $trains ( values %map ) {
+	for my $train ( @{$trains} ) {
+		if ( not $train->{route} ) {
+			next;
+		}
 
-	my $route = $train->{route};
-	my @parts = split( qr{ - }, $route );
+		my $route = $train->{route};
+		my @parts = split( qr{ - }, $route );
 
-	if ( @parts < 2 ) {
-		$train->{route} = { raw => $route };
-		next;
-	}
-	my ( $pre_start, $start, @middle, $end, $post_end );
+		if ( @parts < 2 ) {
+			$train->{route} = { raw => $route };
+			next;
+		}
+		my ( $pre_start, $start, @middle, $end, $post_end );
 
-	if ( $parts[0] =~ m{ ^ [(] }x ) {
-		$pre_start = shift @parts;
-		$pre_start =~ s{ ^ [(] (.*) [)] $ }{$1}x;
-	}
+		if ( $parts[0] =~ m{ ^ [(] }x ) {
+			$pre_start = shift @parts;
+			$pre_start =~ s{ ^ [(] (.*) [)] $ }{$1}x;
+		}
 
-	if ( $parts[-1] =~ m{ ^ [(] }x ) {
-		$post_end = pop @parts;
-		$post_end =~ s{ ^ [(] (.*) [)] $ }{$1}x;
-	}
+		if ( $parts[-1] =~ m{ ^ [(] }x ) {
+			$post_end = pop @parts;
+			$post_end =~ s{ ^ [(] (.*) [)] $ }{$1}x;
+		}
 
-	$start = shift @parts;
-	$end   = pop @parts;
+		$start = shift @parts;
+		$end   = pop @parts;
 
-	$train->{route} = {
-		preStart => $pre_start,
-		start    => $start,
-		middle   => scalar @parts ? [@parts] : undef,
-		end      => $end,
-		postEnd  => $post_end,
-	};
+		$train->{route} = {
+			preStart => $pre_start,
+			start    => $start,
+			middle   => scalar @parts ? [@parts] : undef,
+			end      => $end,
+			postEnd  => $post_end,
+		};
 
-	for my $k ( keys %{ $train->{route} } ) {
-		if ( not defined $train->{route}{$k} ) {
+		for my $k ( keys %{ $train->{route} } ) {
+			if ( not defined $train->{route}{$k} ) {
 
-			# avoid null values (leave out the property instead)
-			delete $train->{route}{$k};
+				# avoid null values (leave out the property instead)
+				delete $train->{route}{$k};
+			}
 		}
 	}
 }
 
-for my $train ( values %map ) {
-	if ( @{ $train->{wagonorder_notes} // [] } == 0 ) {
-		next;
-	}
-	my $first_line  = $train->{wagonorder_notes}[0];
-	my $from_offset = 0;
-	my $to_offset   = 0;
+for my $trains ( values %map ) {
+	for my $train ( @{$trains} ) {
+		if ( @{ $train->{wagonorder_notes} // [] } == 0 ) {
+			next;
+		}
+		my $first_line  = $train->{wagonorder_notes}[0];
+		my $from_offset = 0;
+		my $to_offset   = 0;
 
-	# International trains (e.g. EC 8) are super special and may have another
-	# identifier after the "cycle to" train number.
-	if ( $first_line
-		=~ m{ ^ (?<lead> .*? [^0-9] ) (?<from> \d+ ) (?<middle> [A-Z ]+ ) (?<to> \d+ ) (?: \s+ \d+ )? $ }x
-	  )
-	{
-		$from_offset = length( $+{lead} );
-		$to_offset   = $from_offset + length( $+{from} ) + length( $+{middle} );
-	}
-	if ( not $from_offset ) {
-		next;
-	}
-	my $cycle_id = "unknown";
-	for my $i_line ( @{ $train->{wagonorder_notes} } ) {
+       # International trains (e.g. EC 8) are super special and may have another
+       # identifier after the "cycle to" train number.
+		if ( $first_line
+			=~ m{ ^ (?<lead> .*? [^0-9] ) (?<from> \d+ ) (?<middle> [A-Z ]+ ) (?<to> \d+ ) (?: \s+ \d+ )? $ }x
+		  )
+		{
+			$from_offset = length( $+{lead} );
+			$to_offset
+			  = $from_offset + length( $+{from} ) + length( $+{middle} );
+		}
+		if ( not $from_offset ) {
+			next;
+		}
+		my $cycle_id = "unknown";
+		for my $i_line ( @{ $train->{wagonorder_notes} } ) {
 
        # $i_line is an lvalue, so changes in $i_line end up in wagonorder_notes.
        # We don't want that.
-		my $line = substr( $i_line, 0 );
+			my $line = substr( $i_line, 0 );
 
-		# Fx abc appears to identify cycle groups.
-		# It also confuses the cycle detection code for cycles which do not have
-		# a "from" entry.
-		if ( $line =~ s{(F[0-9] [0-9]{3})}{.. ...} ) {
-			$cycle_id = $1;
-		}
-		if ( length($line) <= $from_offset ) {
-			next;
-		}
-		my $umlauf = substr( $line, $from_offset );
-		if ( $umlauf =~ m{ ^ (\d+) }x ) {
-			push( @{ $train->{cycle}{$cycle_id}{from} }, $1 );
-		}
-		if ( length($line) > $to_offset ) {
-			$umlauf = substr( $line, $to_offset );
+        # Fx abc appears to identify cycle groups.
+        # It also confuses the cycle detection code for cycles which do not have
+        # a "from" entry.
+			if ( $line =~ s{(F[0-9] [0-9]{3})}{.. ...} ) {
+				$cycle_id = $1;
+			}
+			if ( length($line) <= $from_offset ) {
+				next;
+			}
+			my $umlauf = substr( $line, $from_offset );
 			if ( $umlauf =~ m{ ^ (\d+) }x ) {
-				push( @{ $train->{cycle}{$cycle_id}{to} }, $1 );
+				push( @{ $train->{cycle}{$cycle_id}{from} }, $1 );
+			}
+			if ( length($line) > $to_offset ) {
+				$umlauf = substr( $line, $to_offset );
+				if ( $umlauf =~ m{ ^ (\d+) }x ) {
+					push( @{ $train->{cycle}{$cycle_id}{to} }, $1 );
+				}
 			}
 		}
-	}
-	for my $cycle ( values %{ $train->{cycle} // {} } ) {
-		@{ $cycle->{from} } = uniq @{ $cycle->{from} // [] };
-		@{ $cycle->{to} }   = uniq @{ $cycle->{to}   // [] };
+		for my $cycle ( values %{ $train->{cycle} // {} } ) {
+			@{ $cycle->{from} } = uniq @{ $cycle->{from} // [] };
+			@{ $cycle->{to} }   = uniq @{ $cycle->{to}   // [] };
+		}
 	}
 }
 
-for my $train ( values %map ) {
+for my $trains ( values %map ) {
+	for my $train ( @{$trains} ) {
 
-	if ( not $train->{details} ) {
-		next;
-	}
+		if ( not $train->{details} ) {
+			next;
+		}
 
-	my @details = @{ $train->{details} // [] };
-	my %common_setup;
-	my @setups;
+		my @details = @{ $train->{details} // [] };
+		my %common_setup;
+		my @setups;
 
-	for my $line (@details) {
-		my %setup;
-		if ( $line =~ m{ Tfz1:(\d+) }x ) {
-			$setup{series} = $1;
-		}
-		if ( $line =~ m{ Tfz2:(\d+) }x ) {
-			$setup{series2} = $1;
-		}
-		if ( $line =~ m{ Hg(\d+) }x ) {
-			$setup{vmax} = 0 + $1;
-		}
-		if ( $line =~ m{ BrH(\d+) }x ) {
-			$setup{brakingPercentage} = 0 + $1;
-		}
-		if ( $line =~ m{ (\d+)m }x ) {
-			$setup{length} = 0 + $1;
-		}
-		if ( %setup and $line =~ m{ ^ \s* ([+]? [A-Z ]{1,7}) }x ) {
-			my $station = $1;
-			$station =~ s{\s+$}{};
-			my $special = ( $station =~ s{^[+]}{} );
-			if ( $ds100_to_name{$station} ) {
-				$station = $ds100_to_name{$station};
+		for my $line (@details) {
+			my %setup;
+			if ( $line =~ m{ Tfz1:(\d+) }x ) {
+				$setup{series} = $1;
+			}
+			if ( $line =~ m{ Tfz2:(\d+) }x ) {
+				$setup{series2} = $1;
+			}
+			if ( $line =~ m{ Hg(\d+) }x ) {
+				$setup{vmax} = 0 + $1;
 			}
-			if ($special) {
-				$station = "+${station}";
+			if ( $line =~ m{ BrH(\d+) }x ) {
+				$setup{brakingPercentage} = 0 + $1;
 			}
-			$setup{station} = $station;
+			if ( $line =~ m{ (\d+)m }x ) {
+				$setup{length} = 0 + $1;
+			}
+			if ( %setup and $line =~ m{ ^ \s* ([+]? [A-Z ]{1,7}) }x ) {
+				my $station = $1;
+				$station =~ s{\s+$}{};
+				my $special = ( $station =~ s{^[+]}{} );
+				if ( $ds100_to_name{$station} ) {
+					$station = $ds100_to_name{$station};
+				}
+				if ($special) {
+					$station = "+${station}";
+				}
+				$setup{station} = $station;
+			}
+			push( @setups, {%setup} );
 		}
-		push( @setups, {%setup} );
-	}
 
-	for my $key (qw(brakingPercentage length series series2 vmax)) {
-		if ( ( my @uniq = uniq grep { $_ } map { $_->{$key} } @setups ) == 1 ) {
-			$common_setup{$key} = $key =~ m{series} ? $uniq[0] : 0 + $uniq[0];
+		for my $key (qw(brakingPercentage length series series2 vmax)) {
+			if ( ( my @uniq = uniq grep { $_ } map { $_->{$key} } @setups )
+				== 1 )
+			{
+				$common_setup{$key}
+				  = $key =~ m{series} ? $uniq[0] : 0 + $uniq[0];
+			}
 		}
-	}
 
-	delete $train->{details};
-	if (%common_setup) {
-		$train->{commonAttr} = {%common_setup};
+		delete $train->{details};
+		if (%common_setup) {
+			$train->{commonAttr} = {%common_setup};
+		}
+		$train->{attrVariants} = [@setups];
 	}
-	$train->{attrVariants} = [@setups];
 }
 
-for my $train_number ( keys %map ) {
-	my $wagon_numbers_ok = 1;
-	my $wagon_types_ok   = 1;
-	my $wagon_list_ok    = 1;
-	my %bincount;
-	my %type_by_wagon_number;
-	for my $wagon ( @{ $wagon_map{$train_number} // [] } ) {
-		my ( $wagon_type, $wagon_number ) = @{$wagon};
-		if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) {
-
-			# Locomotives do not have a wagon number. That's OK.
-			next;
-		}
-		elsif ( not $wagon_number ) {
-			$wagon_numbers_ok = 0;
-		}
-		else {
-			$bincount{$wagon_number}++;
-			push( @{ $type_by_wagon_number{$wagon_number} }, $wagon_type );
-		}
-	}
-	if ($wagon_numbers_ok) {
-		for my $wagon ( @{ $wagon_map{$train_number} // [] } ) {
+for my $trains ( values %map ) {
+	for my $train ( @{$trains} ) {
+		my $wagon_numbers_ok = 1;
+		my $wagon_types_ok   = 1;
+		my $wagon_list_ok    = 1;
+		my %bincount;
+		my %type_by_wagon_number;
+		for my $wagon ( @{ $train->{raw_wagons} // [] } ) {
 			my ( $wagon_type, $wagon_number ) = @{$wagon};
+			if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) {
 
-			my $json_wagon = { type => $wagon_type };
-			if ($wagon_number) {
-				$json_wagon->{number} = 0 + $wagon_number;
+				# Locomotives do not have a wagon number. That's OK.
+				next;
+			}
+			elsif ( not $wagon_number ) {
+				$wagon_numbers_ok = 0;
 			}
+			else {
+				$bincount{$wagon_number}++;
+				push( @{ $type_by_wagon_number{$wagon_number} }, $wagon_type );
+			}
+		}
+		if ($wagon_numbers_ok) {
+			for my $wagon ( @{ $train->{raw_wagons} // [] } ) {
+				my ( $wagon_type, $wagon_number ) = @{$wagon};
 
-			push( @{ $map{$train_number}{wagons} }, $json_wagon );
+				my $json_wagon = { type => $wagon_type };
+				if ($wagon_number) {
+					$json_wagon->{number} = 0 + $wagon_number;
+				}
 
-			if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) {
-				next;
-			}
+				push( @{ $train->{wagons} }, $json_wagon );
+
+				if ( $wagon_type =~ m{ 40[1-9] | 14[67] }x ) {
+					next;
+				}
 
-			if ( $bincount{$wagon_number} > 1 ) {
-				$wagon_list_ok = 0;
-				if ( $type_by_wagon_number{$wagon_number}[0] ne
-					$type_by_wagon_number{$wagon_number}[1] )
-				{
-					$wagon_types_ok = 0;
+				if ( $bincount{$wagon_number} > 1 ) {
+					$wagon_list_ok = 0;
+					if ( $type_by_wagon_number{$wagon_number}[0] ne
+						$type_by_wagon_number{$wagon_number}[1] )
+					{
+						$wagon_types_ok = 0;
+					}
 				}
 			}
 		}
-	}
-	if ( not $wagon_types_ok ) {
+		if ( not $wagon_types_ok ) {
 
-		# train type may differ depending on date of week or similar. We don't
-		# parse/handle that yet.
-		$map{$train_number}{type} = $map{$train_number}{rawType};
-		delete $map{$train_number}{shortType};
-		delete $map{$train_number}{wagons};
-		delete $map{$train_number}{hasWagon};
-	}
-	elsif ( not $wagon_list_ok ) {
+          # train type may differ depending on date of week or similar. We don't
+          # parse/handle that yet.
+			$train->{type} = $train->{rawType};
+			delete $train->{shortType};
+			delete $train->{wagons};
+			delete $train->{hasWagon};
+		}
+		elsif ( not $wagon_list_ok ) {
 
-		# train type appears to be correct, but the list contains duplicates
-		# (e.g. due to unhandled schedule variants)
-		delete $map{$train_number}{wagons};
+			# train type appears to be correct, but the list contains duplicates
+			# (e.g. due to unhandled schedule variants)
+			delete $train->{wagons};
+		}
 	}
 }
 
 # Cleanup
 
-for my $train ( values %map ) {
-	delete $train->{wagonorder_notes};
+for my $trains ( values %map ) {
+	for my $train ( @{$trains} ) {
+		delete $train->{raw_wagons};
+		delete $train->{wagonorder_notes};
+	}
 }
 
-# broken umlauf (indentation changes on page break)
-delete $map{104}{cycle};
-delete $map{1588}{cycle};
-delete $map{1700}{cycle};
-delete $map{77689}{cycle};
-delete $map{78112}{cycle};
-
-# indentation change between consecutive lines. wat.
-delete $map{939}{cycle};
-delete $map{2310}{cycle};
+for my $bork (qw(104 1588 1700 77689 78112 939 2310)) {
+	for my $train ( @{ $map{$bork} } ) {
+		delete $train->{cycle};
+	}
+}
 
 # use canonical output (i.e., sort hash keys) to allow for easy diffing.
 say JSON->new->utf8->canonical->encode(
diff --git a/bin/umlauf-to-dot b/bin/umlauf-to-dot
index 1ac2c2e..860c962 100755
--- a/bin/umlauf-to-dot
+++ b/bin/umlauf-to-dot
@@ -67,9 +67,11 @@ sub build_cycle {
 
 		my @candidates;
 
-		if ( my $c = $map->{$train_number}{cycle}{$cycle_id} ) {
-			push( @candidates, @{ $c->{from} // [] } );
-			push( @candidates, @{ $c->{to}   // [] } );
+		for my $train ( @{ $map->{$train_number} } ) {
+			if ( my $c = $train->{cycle}{$cycle_id} ) {
+				push( @candidates, @{ $c->{from} // [] } );
+				push( @candidates, @{ $c->{to}   // [] } );
+			}
 		}
 
 		@candidates = uniq @candidates;
@@ -87,29 +89,39 @@ sub build_cycle {
 	my @output;
 
 	for my $train_number (@train_numbers) {
-		if ( my $c = $map->{$train_number}{cycle}{$cycle_id} ) {
-			for my $from ( @{ $c->{from} // [] } ) {
-				push(
-					@output,
-					sprintf( "%s -> %s;",
-						format_train( $from,         $map->{$from} ),
-						format_train( $train_number, $map->{$train_number} ) )
-				);
-			}
-			for my $to ( @{ $c->{to} // [] } ) {
-				push(
-					@output,
-					sprintf( "%s -> %s;",
-						format_train( $train_number, $map->{$train_number} ),
-						format_train( $to,           $map->{$to} ) )
-				);
+		for my $train ( @{ $map->{$train_number} } ) {
+			if ( my $c = $train->{cycle}{$cycle_id} ) {
+				for my $from ( @{ $c->{from} // [] } ) {
+					push(
+						@output,
+						sprintf(
+							"%s -> %s;",
+							format_train( $from, $map->{$from}[0] ),
+							format_train(
+								$train_number, $map->{$train_number}[0]
+							)
+						)
+					);
+				}
+				for my $to ( @{ $c->{to} // [] } ) {
+					push(
+						@output,
+						sprintf(
+							"%s -> %s;",
+							format_train(
+								$train_number, $map->{$train_number}[0]
+							),
+							format_train( $to, $map->{$to}[0] )
+						)
+					);
+				}
 			}
 		}
 		if ( $train_number != $line ) {
 			push(
 				@output,
 				sprintf( "%s [shape=box];",
-					format_train( $train_number, $map->{$train_number} ) )
+					format_train( $train_number, $map->{$train_number}[0] ) )
 			);
 		}
 	}
@@ -117,7 +129,11 @@ sub build_cycle {
 	return @output;
 }
 
-my @cycle_ids = keys %{ $map->{$line}{cycle} // {} };
+my @cycle_ids;
+
+for my $train ( @{ $map->{$line} } ) {
+	push( @cycle_ids, keys %{ $train->{cycle} // {} } );
+}
 
 say "digraph Umlauf {";
 
@@ -125,6 +141,6 @@ for my $cycle_id (@cycle_ids) {
 	say join( "\n", uniq build_cycle( $line, $cycle_id ) );
 }
 
-printf( "%s [style=bold];\n", format_train( $line, $map->{$line} ) );
+printf( "%s [style=bold];\n", format_train( $line, $map->{$line}[0] ) );
 
 say "}"
diff --git a/schema.yml b/schema.yml
index 60f5b15..0cbc85d 100644
--- a/schema.yml
+++ b/schema.yml
@@ -40,7 +40,9 @@ components:
           type: object
           description: dict mapping train numbers to train objects
           additionalProperties:
-            $ref: '#/components/schemas/train'
+            type: array
+            items:
+              $ref: '#/components/schemas/train'
     train:
       type: object
       properties:
-- 
cgit v1.2.3


From 985001e8b4fff4b2c7dcf3ebd8d26a178e1cf561 Mon Sep 17 00:00:00 2001
From: Daniel Friesel <derf@finalrewind.org>
Date: Mon, 11 Jan 2021 22:09:32 +0100
Subject: filter train duplicates by valid date range

---
 bin/db-zugbildung-to-json | 74 +++++++++++++++++++++++++++++++++++++++++++----
 bin/umlauf-to-dot         | 60 +++++++++++++++-----------------------
 schema.yml                |  7 ++++-
 3 files changed, 99 insertions(+), 42 deletions(-)

diff --git a/bin/db-zugbildung-to-json b/bin/db-zugbildung-to-json
index 4721d31..6159a0e 100755
--- a/bin/db-zugbildung-to-json
+++ b/bin/db-zugbildung-to-json
@@ -8,6 +8,8 @@ use warnings;
 use 5.020;
 use utf8;
 
+use DateTime;
+use DateTime::Format::Strptime;
 use Encode qw(decode);
 use File::Slurp qw(write_file);
 use IPC::Run3;
@@ -25,6 +27,32 @@ sub show_usage {
 	exit $exit_code;
 }
 
+my $strp = DateTime::Format::Strptime->new(
+	pattern   => '%F',
+	time_zone => 'Europe/Berlin'
+);
+my $now = DateTime->now( time_zone => 'Europe/Berlin' );
+
+sub range_is_today {
+	my ($range) = @_;
+
+	if ( $range =~ m{^(.*)/(.*)$} ) {
+		my $dt1 = $strp->parse_datetime($1);
+		my $dt2 = $strp->parse_datetime($2);
+		if ( $dt1 and $dt2 and $dt1->epoch < $now->epoch < $dt2->epoch ) {
+			return 1;
+		}
+	}
+	else {
+		if ( my $dt = $strp->parse_datetime($range) ) {
+			if ( $dt->ymd eq $now->ymd ) {
+				return 1;
+			}
+		}
+	}
+	return;
+}
+
 my @weekdays = (qw(Mo Di Mi Do Fr Sa So));
 my @months   = (qw(0 I II III IV V VI VII VIII IX X XI XII));
 my %weekday  = map { ( $weekdays[$_] => $_ ) } ( 0 .. $#weekdays );
@@ -193,7 +221,7 @@ sub parse_condition {
 	my %ret;
 
 	if ( $line
-		=~ m{ ^ \s* (?<from> .*? ) \s - \s (?<to> [^,]+ ) , \s* (?<weekdays> (Mo|Di|Mi|Do|Fr|Sa|So|tgl[.]|[+]|-)+) \s* (?<dates> .* ) $ }x
+		=~ m{ ^ \s* (?<from> .*? ) \s - \s (?<to> [^,]+ ) , \s* (?<unknown> N \s* )? (?<weekdays> (Mo|Di|Mi|Do|Fr|Sa|So|tgl[.]|[+]|-)+) \s* (?<dates> .* ) $ }x
 	  )
 	{
 		$ret{from}     = $+{from};
@@ -726,12 +754,48 @@ for my $bork (qw(104 1588 1700 77689 78112 939 2310)) {
 	}
 }
 
+my %smap;
+for my $train_no ( keys %map ) {
+	if ( @{ $map{$train_no} } == 1 ) {
+		$smap{$train_no} = $map{$train_no}[0];
+	}
+	else {
+		my $latest_valid;
+		my $valid_count = 0;
+		for my $train ( @{ $map{$train_no} } ) {
+			my $is_valid = 0;
+			for my $schedule ( @{ $train->{schedules} // [] } ) {
+				for my $valid ( @{ $schedule->{valid} // [] } ) {
+					if ( range_is_today($valid) ) {
+						$is_valid = 1;
+					}
+				}
+
+				# invalid may override valid for certain days
+				for my $invalid ( @{ $schedule->{invalid} // [] } ) {
+					if ( range_is_today($invalid) ) {
+						$is_valid = 0;
+					}
+				}
+			}
+			if ($is_valid) {
+				$latest_valid = $train;
+				$valid_count++;
+			}
+		}
+		if ( $valid_count == 1 ) {
+			$smap{$train_no} = $latest_valid;
+		}
+	}
+}
+
 # use canonical output (i.e., sort hash keys) to allow for easy diffing.
 say JSON->new->utf8->canonical->encode(
 	{
-		deprecated => \0,
-		source     => $wr_name,
-		train      => {%map},
-		valid      => $valid,
+		deprecated     => \0,
+		source         => $wr_name,
+		train          => {%smap},
+		train_variants => {%map},
+		valid          => $valid,
 	}
 );
diff --git a/bin/umlauf-to-dot b/bin/umlauf-to-dot
index 860c962..47a766c 100755
--- a/bin/umlauf-to-dot
+++ b/bin/umlauf-to-dot
@@ -67,11 +67,10 @@ sub build_cycle {
 
 		my @candidates;
 
-		for my $train ( @{ $map->{$train_number} } ) {
-			if ( my $c = $train->{cycle}{$cycle_id} ) {
-				push( @candidates, @{ $c->{from} // [] } );
-				push( @candidates, @{ $c->{to}   // [] } );
-			}
+		my $train = $map->{$train_number};
+		if ( my $c = $train->{cycle}{$cycle_id} ) {
+			push( @candidates, @{ $c->{from} // [] } );
+			push( @candidates, @{ $c->{to}   // [] } );
 		}
 
 		@candidates = uniq @candidates;
@@ -89,39 +88,30 @@ sub build_cycle {
 	my @output;
 
 	for my $train_number (@train_numbers) {
-		for my $train ( @{ $map->{$train_number} } ) {
-			if ( my $c = $train->{cycle}{$cycle_id} ) {
-				for my $from ( @{ $c->{from} // [] } ) {
-					push(
-						@output,
-						sprintf(
-							"%s -> %s;",
-							format_train( $from, $map->{$from}[0] ),
-							format_train(
-								$train_number, $map->{$train_number}[0]
-							)
-						)
-					);
-				}
-				for my $to ( @{ $c->{to} // [] } ) {
-					push(
-						@output,
-						sprintf(
-							"%s -> %s;",
-							format_train(
-								$train_number, $map->{$train_number}[0]
-							),
-							format_train( $to, $map->{$to}[0] )
-						)
-					);
-				}
+		my $train = $map->{$train_number};
+		if ( my $c = $train->{cycle}{$cycle_id} ) {
+			for my $from ( @{ $c->{from} // [] } ) {
+				push(
+					@output,
+					sprintf( "%s -> %s;",
+						format_train( $from,         $map->{$from} ),
+						format_train( $train_number, $map->{$train_number} ) )
+				);
+			}
+			for my $to ( @{ $c->{to} // [] } ) {
+				push(
+					@output,
+					sprintf( "%s -> %s;",
+						format_train( $train_number, $map->{$train_number} ),
+						format_train( $to,           $map->{$to} ) )
+				);
 			}
 		}
 		if ( $train_number != $line ) {
 			push(
 				@output,
 				sprintf( "%s [shape=box];",
-					format_train( $train_number, $map->{$train_number}[0] ) )
+					format_train( $train_number, $map->{$train_number} ) )
 			);
 		}
 	}
@@ -131,9 +121,7 @@ sub build_cycle {
 
 my @cycle_ids;
 
-for my $train ( @{ $map->{$line} } ) {
-	push( @cycle_ids, keys %{ $train->{cycle} // {} } );
-}
+push( @cycle_ids, keys %{ $map->{$line}{cycle} // {} } );
 
 say "digraph Umlauf {";
 
@@ -141,6 +129,6 @@ for my $cycle_id (@cycle_ids) {
 	say join( "\n", uniq build_cycle( $line, $cycle_id ) );
 }
 
-printf( "%s [style=bold];\n", format_train( $line, $map->{$line}[0] ) );
+printf( "%s [style=bold];\n", format_train( $line, $map->{$line} ) );
 
 say "}"
diff --git a/schema.yml b/schema.yml
index 0cbc85d..b7542f9 100644
--- a/schema.yml
+++ b/schema.yml
@@ -38,7 +38,12 @@ components:
           description: ISO 8601 interval describing when this train composition dataset is valid
         train:
           type: object
-          description: dict mapping train numbers to train objects
+          description: dict mapping train numbers to probably valid train objects
+          additionalProperties:
+            $ref: '#/components/schemas/train'
+        train_variants:
+          type: object
+          description: dict mapping train numbers to list of possible train objects
           additionalProperties:
             type: array
             items:
-- 
cgit v1.2.3