summaryrefslogtreecommitdiff
path: root/share/xml2json
diff options
context:
space:
mode:
Diffstat (limited to 'share/xml2json')
-rwxr-xr-xshare/xml2json294
1 files changed, 294 insertions, 0 deletions
diff --git a/share/xml2json b/share/xml2json
new file mode 100755
index 0000000..1ca57b7
--- /dev/null
+++ b/share/xml2json
@@ -0,0 +1,294 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use 5.010;
+
+use File::Slurp qw(read_file write_file);
+use JSON;
+use List::Util qw(any);
+use XML::LibXML;
+
+# old / new / renamed / reappeared / missing
+my $mode = shift;
+my $extra = shift // q{};
+
+if ( $extra eq 'if-active' ) {
+ use Travel::Status::DE::IRIS;
+}
+
+my $json = JSON->new->utf8;
+my $json_str = read_file('stations.json');
+my $stations = $json->decode($json_str);
+@{$stations}
+ = sort { $a->{name} cmp $b->{name} or $a->{eva} <=> $b->{eva} } @{$stations};
+
+$json_str = read_file('old_stations.json');
+my $old_stations = $json->decode($json_str);
+@{$old_stations} = sort { $a->{name} cmp $b->{name} or $a->{eva} <=> $b->{eva} }
+ @{$old_stations};
+
+my %station_by_ds100;
+for my $station ( @{$stations} ) {
+ $station_by_ds100{ $station->{ds100} } = $station;
+}
+
+my %old_station_by_ds100;
+for my $old_station ( @{$old_stations} ) {
+ $old_station_by_ds100{ $old_station->{ds100} } = $old_station;
+}
+
+my %station_by_eva;
+for my $station ( @{$stations} ) {
+ $station_by_eva{ $station->{eva} } = $station;
+}
+
+my %old_station_by_eva;
+for my $old_station ( @{$old_stations} ) {
+ $old_station_by_eva{ $old_station->{eva} } = $old_station;
+}
+
+my %station_by_name;
+for my $station ( @{$stations} ) {
+ push( @{ $station_by_name{ $station->{name} } }, $station );
+}
+
+my %old_station_by_name;
+for my $old_station ( @{$old_stations} ) {
+ push( @{ $old_station_by_name{ $old_station->{name} } }, $old_station );
+}
+
+my %xml_by_ds100;
+my %xml_by_eva;
+my %xml_by_name;
+
+my $xml_str = read_file('stations.xml');
+my $tree = XML::LibXML->load_xml( string => $xml_str );
+
+my @missing;
+
+sub add_station {
+ my ( $name, $ds100, $eva ) = @_;
+ my $station = {
+ name => $name,
+ ds100 => $ds100,
+ eva => $eva,
+ };
+ push( @{$stations}, $station );
+ $station_by_eva{$eva} = $station;
+ $station_by_ds100{$ds100} = $station;
+ push( @{ $station_by_name{$name} }, $station );
+}
+
+sub add_old_station {
+ my ( $name, $ds100, $eva, $latlong ) = @_;
+ my $station = {
+ name => $name,
+ ds100 => $ds100,
+ eva => $eva,
+ latlong => $latlong
+ };
+ push( @{$old_stations}, $station );
+ $old_station_by_eva{$eva} = $station;
+ $old_station_by_ds100{$ds100} = $station;
+ push( @{ $old_station_by_name{$name} }, $station );
+}
+
+sub delete_station {
+ my ( $name, $ds100, $eva ) = @_;
+ delete $station_by_name{$name};
+ delete $station_by_ds100{$ds100};
+ delete $station_by_eva{$eva};
+ @{$stations} = grep {
+ $_->{name} ne $name and $_->{ds100} ne $ds100 and $_->{eva} != $eva
+ } @{$stations};
+}
+
+sub delete_old_station {
+ my ( $name, $ds100, $eva ) = @_;
+ delete $old_station_by_name{$name};
+ delete $old_station_by_ds100{$ds100};
+ delete $old_station_by_eva{$eva};
+ @{$old_stations} = grep {
+ $_->{name} ne $name and $_->{ds100} ne $ds100 and $_->{eva} != $eva
+ } @{$old_stations};
+}
+
+sub has_departures {
+ my ($eva) = @_;
+
+ my $status = Travel::Status::DE::IRIS->new(
+ station => $eva,
+ lookbehind => 120,
+ lookahead => 240
+ );
+
+ if ( $status->results ) {
+ return 1;
+ }
+ return;
+}
+
+# TODO falls ein eintrag aus old_stations im XML auftaucht sollte er aus old_stations raus und wieder in stations rein
+
+if ( -e 'missing.txt' ) {
+ for my $line ( read_file('missing.txt') ) {
+ chomp $line;
+ push( @missing, $line );
+ }
+}
+
+my %renamed;
+if ( -e 'renamed.json' ) {
+ $json_str = read_file('renamed.json');
+ %renamed = %{ $json->decode($json_str) };
+}
+
+for my $station ( $tree->findnodes('//station') ) {
+ my $name = $station->getAttribute('name');
+ my $eva = $station->getAttribute('eva');
+ my $ds100 = $station->getAttribute('ds100');
+ my $is_db = $station->getAttribute('db') eq 'true';
+
+ my $xml_station = {
+ name => $name,
+ eva => $eva,
+ ds100 => $ds100,
+ is_db => $is_db,
+ };
+ $xml_by_ds100{$ds100} = $xml_station;
+ $xml_by_eva{$eva} = $xml_station;
+
+ if ( exists $xml_by_eva{$name} ) {
+ push( @{ $xml_by_name{$name}{extra} }, $xml_station );
+ }
+ else {
+ $xml_by_name{$name} = $xml_station;
+ }
+}
+
+for my $station ( $tree->findnodes('//station') ) {
+ my $name = $station->getAttribute('name');
+ my $eva = $station->getAttribute('eva');
+ my $ds100 = $station->getAttribute('ds100');
+ my $is_db = $station->getAttribute('db') eq 'true';
+
+ my $found = 0;
+
+ if ( $station_by_name{$name} or $station_by_eva{$eva} ) {
+ $found = 1;
+ }
+
+ if ( $station_by_ds100{$ds100}
+ and $station_by_ds100{$ds100}{name} ne $name )
+ {
+ printf( "%8s has been renamed: %30s -> %30s\n",
+ $ds100, $station_by_ds100{$ds100}{name}, $name );
+ if ( not $mode or $mode eq 'renamed' ) {
+ $renamed{ $station_by_ds100{$ds100}{name} } = $name;
+ $station_by_ds100{$ds100}{name} = $name;
+ }
+ }
+ elsif ( $station_by_eva{$eva} and $station_by_eva{$eva}{name} ne $name ) {
+ printf(
+ "%d mismatch: (%s -> %s), (%s -> %s)\n",
+ $eva, $station_by_eva{$eva}{name},
+ $name, $station_by_eva{$eva}{ds100}, $ds100
+ );
+ }
+ elsif ( $station_by_name{$name}
+ and not any { $_->{ds100} eq $ds100 } @{ $station_by_name{$name} }
+ and $is_db )
+ {
+ printf( "%30s has a new DS100 alias: %8s\n", $name, $ds100 );
+ if ( not $mode or $mode eq 'new' ) {
+ add_station( $name, $ds100, $eva );
+ }
+ }
+ elsif ( $station_by_name{$name}
+ and not any { $_->{eva} == $eva } @{ $station_by_name{$name} }
+ and $is_db )
+ {
+ printf( "%30s has a new EVA alias: %d\n", $name, $eva );
+ if ( not $mode or $mode eq 'new' ) {
+ add_station( $name, $ds100, $eva );
+ }
+ }
+
+ if (
+ $name !~ m{Betriebsstelle nicht bekannt}
+ and my $old = (
+ $old_station_by_name{$name} // $old_station_by_ds100{$ds100}
+ // $old_station_by_eva{$eva}
+ )
+ )
+ {
+ printf( "%30s has re-appeared as %s %d\n", $name, $ds100, $eva );
+ if ( not $mode or $mode eq 'reappeared' ) {
+ if ( $extra ne 'if-active' or has_departures($eva) ) {
+ if ( ref($old) eq 'ARRAY' ) {
+ for my $o ( @{$old} ) {
+ delete_old_station( $o->{name}, $o->{ds100},
+ $o->{eva} );
+ }
+ }
+ else {
+ delete_old_station( $old->{name}, $old->{ds100},
+ $old->{eva} );
+ }
+ add_station( $name, $ds100, $eva );
+ }
+ if ( $extra eq 'if-active' ) {
+ sleep(1);
+ }
+ }
+ }
+
+ if ( not $found
+ and any { $_ eq $name or $_ eq $ds100 } @missing )
+ {
+ say "missing $eva $ds100 \"$name\"";
+
+ if ( not $mode or $mode eq 'missing' ) {
+ add_station( $name, $ds100, $eva );
+ if ( $old_station_by_name{$name} ) {
+ delete_old_station( $name, $ds100, $eva );
+ }
+ }
+ }
+}
+
+my @to_delete;
+
+for my $i ( 0 .. $#{$stations} ) {
+ $stations->[$i]{eva} = 0 + $stations->[$i]{eva};
+ my $j_station = $stations->[$i];
+ my $j_name = $j_station->{name};
+ my $j_ds100 = $j_station->{ds100};
+ my $j_eva = $j_station->{eva};
+
+ if ( not( $xml_by_name{$j_name} or $xml_by_eva{$j_eva} ) ) {
+ say "station no longer exists: $j_eva $j_ds100 \"$j_name\"";
+ if ( not $mode or $mode eq 'old' ) {
+ unshift( @to_delete, $i );
+ add_old_station( $j_name, $j_ds100, $j_eva, $j_station->{latlong} );
+ }
+ }
+}
+
+for my $i ( 0 .. $#{$old_stations} ) {
+ $old_stations->[$i]{eva} = 0 + $old_stations->[$i]{eva};
+}
+
+for my $i (@to_delete) {
+ splice( @{$stations}, $i, 1 );
+}
+
+my $json_out = $json->canonical->pretty->encode($stations);
+write_file( 'stations.json', $json_out );
+
+$json_out = $json->canonical->pretty->encode($old_stations);
+write_file( 'old_stations.json', $json_out );
+
+$json_out = $json->encode( \%renamed );
+write_file( 'renamed.json', $json_out );