diff options
Diffstat (limited to 'share/xml2json')
| -rwxr-xr-x | share/xml2json | 294 | 
1 files changed, 294 insertions, 0 deletions
diff --git a/share/xml2json b/share/xml2json new file mode 100755 index 0000000..1ca57b7 --- /dev/null +++ b/share/xml2json @@ -0,0 +1,294 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use 5.010; + +use File::Slurp qw(read_file write_file); +use JSON; +use List::Util qw(any); +use XML::LibXML; + +# old / new / renamed / reappeared / missing +my $mode  = shift; +my $extra = shift // q{}; + +if ( $extra eq 'if-active' ) { +	use Travel::Status::DE::IRIS; +} + +my $json     = JSON->new->utf8; +my $json_str = read_file('stations.json'); +my $stations = $json->decode($json_str); +@{$stations} +  = sort { $a->{name} cmp $b->{name} or $a->{eva} <=> $b->{eva} } @{$stations}; + +$json_str = read_file('old_stations.json'); +my $old_stations = $json->decode($json_str); +@{$old_stations} = sort { $a->{name} cmp $b->{name} or $a->{eva} <=> $b->{eva} } +  @{$old_stations}; + +my %station_by_ds100; +for my $station ( @{$stations} ) { +	$station_by_ds100{ $station->{ds100} } = $station; +} + +my %old_station_by_ds100; +for my $old_station ( @{$old_stations} ) { +	$old_station_by_ds100{ $old_station->{ds100} } = $old_station; +} + +my %station_by_eva; +for my $station ( @{$stations} ) { +	$station_by_eva{ $station->{eva} } = $station; +} + +my %old_station_by_eva; +for my $old_station ( @{$old_stations} ) { +	$old_station_by_eva{ $old_station->{eva} } = $old_station; +} + +my %station_by_name; +for my $station ( @{$stations} ) { +	push( @{ $station_by_name{ $station->{name} } }, $station ); +} + +my %old_station_by_name; +for my $old_station ( @{$old_stations} ) { +	push( @{ $old_station_by_name{ $old_station->{name} } }, $old_station ); +} + +my %xml_by_ds100; +my %xml_by_eva; +my %xml_by_name; + +my $xml_str = read_file('stations.xml'); +my $tree    = XML::LibXML->load_xml( string => $xml_str ); + +my @missing; + +sub add_station { +	my ( $name, $ds100, $eva ) = @_; +	my $station = { +		name  => $name, +		ds100 => $ds100, +		eva   => $eva, +	}; +	push( @{$stations}, $station ); +	$station_by_eva{$eva}     = $station; +	$station_by_ds100{$ds100} = $station; +	push( @{ $station_by_name{$name} }, $station ); +} + +sub add_old_station { +	my ( $name, $ds100, $eva, $latlong ) = @_; +	my $station = { +		name    => $name, +		ds100   => $ds100, +		eva     => $eva, +		latlong => $latlong +	}; +	push( @{$old_stations}, $station ); +	$old_station_by_eva{$eva}     = $station; +	$old_station_by_ds100{$ds100} = $station; +	push( @{ $old_station_by_name{$name} }, $station ); +} + +sub delete_station { +	my ( $name, $ds100, $eva ) = @_; +	delete $station_by_name{$name}; +	delete $station_by_ds100{$ds100}; +	delete $station_by_eva{$eva}; +	@{$stations} = grep { +		$_->{name} ne $name and $_->{ds100} ne $ds100 and $_->{eva} != $eva +	} @{$stations}; +} + +sub delete_old_station { +	my ( $name, $ds100, $eva ) = @_; +	delete $old_station_by_name{$name}; +	delete $old_station_by_ds100{$ds100}; +	delete $old_station_by_eva{$eva}; +	@{$old_stations} = grep { +		$_->{name} ne $name and $_->{ds100} ne $ds100 and $_->{eva} != $eva +	} @{$old_stations}; +} + +sub has_departures { +	my ($eva) = @_; + +	my $status = Travel::Status::DE::IRIS->new( +		station    => $eva, +		lookbehind => 120, +		lookahead  => 240 +	); + +	if ( $status->results ) { +		return 1; +	} +	return; +} + +# TODO falls ein eintrag aus old_stations im XML auftaucht sollte er aus old_stations raus und wieder in stations rein + +if ( -e 'missing.txt' ) { +	for my $line ( read_file('missing.txt') ) { +		chomp $line; +		push( @missing, $line ); +	} +} + +my %renamed; +if ( -e 'renamed.json' ) { +	$json_str = read_file('renamed.json'); +	%renamed  = %{ $json->decode($json_str) }; +} + +for my $station ( $tree->findnodes('//station') ) { +	my $name  = $station->getAttribute('name'); +	my $eva   = $station->getAttribute('eva'); +	my $ds100 = $station->getAttribute('ds100'); +	my $is_db = $station->getAttribute('db') eq 'true'; + +	my $xml_station = { +		name  => $name, +		eva   => $eva, +		ds100 => $ds100, +		is_db => $is_db, +	}; +	$xml_by_ds100{$ds100} = $xml_station; +	$xml_by_eva{$eva}     = $xml_station; + +	if ( exists $xml_by_eva{$name} ) { +		push( @{ $xml_by_name{$name}{extra} }, $xml_station ); +	} +	else { +		$xml_by_name{$name} = $xml_station; +	} +} + +for my $station ( $tree->findnodes('//station') ) { +	my $name  = $station->getAttribute('name'); +	my $eva   = $station->getAttribute('eva'); +	my $ds100 = $station->getAttribute('ds100'); +	my $is_db = $station->getAttribute('db') eq 'true'; + +	my $found = 0; + +	if ( $station_by_name{$name} or $station_by_eva{$eva} ) { +		$found = 1; +	} + +	if (    $station_by_ds100{$ds100} +		and $station_by_ds100{$ds100}{name} ne $name ) +	{ +		printf( "%8s has been renamed: %30s -> %30s\n", +			$ds100, $station_by_ds100{$ds100}{name}, $name ); +		if ( not $mode or $mode eq 'renamed' ) { +			$renamed{ $station_by_ds100{$ds100}{name} } = $name; +			$station_by_ds100{$ds100}{name} = $name; +		} +	} +	elsif ( $station_by_eva{$eva} and $station_by_eva{$eva}{name} ne $name ) { +		printf( +			"%d mismatch: (%s -> %s), (%s -> %s)\n", +			$eva,  $station_by_eva{$eva}{name}, +			$name, $station_by_eva{$eva}{ds100}, $ds100 +		); +	} +	elsif ( $station_by_name{$name} +		and not any { $_->{ds100} eq $ds100 } @{ $station_by_name{$name} } +		and $is_db ) +	{ +		printf( "%30s has a new DS100 alias: %8s\n", $name, $ds100 ); +		if ( not $mode or $mode eq 'new' ) { +			add_station( $name, $ds100, $eva ); +		} +	} +	elsif ( $station_by_name{$name} +		and not any { $_->{eva} == $eva } @{ $station_by_name{$name} } +		and $is_db ) +	{ +		printf( "%30s has a new EVA alias: %d\n", $name, $eva ); +		if ( not $mode or $mode eq 'new' ) { +			add_station( $name, $ds100, $eva ); +		} +	} + +	if ( +		$name !~ m{Betriebsstelle nicht bekannt} +		and my $old = ( +			$old_station_by_name{$name} // $old_station_by_ds100{$ds100} +			  // $old_station_by_eva{$eva} +		) +	  ) +	{ +		printf( "%30s has re-appeared as %s %d\n", $name, $ds100, $eva ); +		if ( not $mode or $mode eq 'reappeared' ) { +			if ( $extra ne 'if-active' or has_departures($eva) ) { +				if ( ref($old) eq 'ARRAY' ) { +					for my $o ( @{$old} ) { +						delete_old_station( $o->{name}, $o->{ds100}, +							$o->{eva} ); +					} +				} +				else { +					delete_old_station( $old->{name}, $old->{ds100}, +						$old->{eva} ); +				} +				add_station( $name, $ds100, $eva ); +			} +			if ( $extra eq 'if-active' ) { +				sleep(1); +			} +		} +	} + +	if ( not $found +		and any { $_ eq $name or $_ eq $ds100 } @missing ) +	{ +		say "missing $eva  $ds100  \"$name\""; + +		if ( not $mode or $mode eq 'missing' ) { +			add_station( $name, $ds100, $eva ); +			if ( $old_station_by_name{$name} ) { +				delete_old_station( $name, $ds100, $eva ); +			} +		} +	} +} + +my @to_delete; + +for my $i ( 0 .. $#{$stations} ) { +	$stations->[$i]{eva} = 0 + $stations->[$i]{eva}; +	my $j_station = $stations->[$i]; +	my $j_name    = $j_station->{name}; +	my $j_ds100   = $j_station->{ds100}; +	my $j_eva     = $j_station->{eva}; + +	if ( not( $xml_by_name{$j_name} or $xml_by_eva{$j_eva} ) ) { +		say "station no longer exists: $j_eva  $j_ds100  \"$j_name\""; +		if ( not $mode or $mode eq 'old' ) { +			unshift( @to_delete, $i ); +			add_old_station( $j_name, $j_ds100, $j_eva, $j_station->{latlong} ); +		} +	} +} + +for my $i ( 0 .. $#{$old_stations} ) { +	$old_stations->[$i]{eva} = 0 + $old_stations->[$i]{eva}; +} + +for my $i (@to_delete) { +	splice( @{$stations}, $i, 1 ); +} + +my $json_out = $json->canonical->pretty->encode($stations); +write_file( 'stations.json', $json_out ); + +$json_out = $json->canonical->pretty->encode($old_stations); +write_file( 'old_stations.json', $json_out ); + +$json_out = $json->encode( \%renamed ); +write_file( 'renamed.json', $json_out );  | 
