diff options
Diffstat (limited to 'share/xml2json')
-rwxr-xr-x | share/xml2json | 289 |
1 files changed, 260 insertions, 29 deletions
diff --git a/share/xml2json b/share/xml2json index 4429245..1ca57b7 100755 --- a/share/xml2json +++ b/share/xml2json @@ -6,58 +6,289 @@ use 5.010; use File::Slurp qw(read_file write_file); use JSON; +use List::Util qw(any); use XML::LibXML; +# old / new / renamed / reappeared / missing +my $mode = shift; +my $extra = shift // q{}; + +if ( $extra eq 'if-active' ) { + use Travel::Status::DE::IRIS; +} + +my $json = JSON->new->utf8; my $json_str = read_file('stations.json'); -my $stations = JSON->new->utf8->decode($json_str); -@{$stations} = sort { $a->{name} cmp $b->{name} } @{$stations}; +my $stations = $json->decode($json_str); +@{$stations} + = sort { $a->{name} cmp $b->{name} or $a->{eva} <=> $b->{eva} } @{$stations}; + +$json_str = read_file('old_stations.json'); +my $old_stations = $json->decode($json_str); +@{$old_stations} = sort { $a->{name} cmp $b->{name} or $a->{eva} <=> $b->{eva} } + @{$old_stations}; + +my %station_by_ds100; +for my $station ( @{$stations} ) { + $station_by_ds100{ $station->{ds100} } = $station; +} + +my %old_station_by_ds100; +for my $old_station ( @{$old_stations} ) { + $old_station_by_ds100{ $old_station->{ds100} } = $old_station; +} + +my %station_by_eva; +for my $station ( @{$stations} ) { + $station_by_eva{ $station->{eva} } = $station; +} + +my %old_station_by_eva; +for my $old_station ( @{$old_stations} ) { + $old_station_by_eva{ $old_station->{eva} } = $old_station; +} + +my %station_by_name; +for my $station ( @{$stations} ) { + push( @{ $station_by_name{ $station->{name} } }, $station ); +} + +my %old_station_by_name; +for my $old_station ( @{$old_stations} ) { + push( @{ $old_station_by_name{ $old_station->{name} } }, $old_station ); +} + +my %xml_by_ds100; +my %xml_by_eva; +my %xml_by_name; my $xml_str = read_file('stations.xml'); my $tree = XML::LibXML->load_xml( string => $xml_str ); +my @missing; + +sub add_station { + my ( $name, $ds100, $eva ) = @_; + my $station = { + name => $name, + ds100 => $ds100, + eva => $eva, + }; + push( @{$stations}, $station ); + $station_by_eva{$eva} = $station; + $station_by_ds100{$ds100} = $station; + push( @{ $station_by_name{$name} }, $station ); +} + +sub add_old_station { + my ( $name, $ds100, $eva, $latlong ) = @_; + my $station = { + name => $name, + ds100 => $ds100, + eva => $eva, + latlong => $latlong + }; + push( @{$old_stations}, $station ); + $old_station_by_eva{$eva} = $station; + $old_station_by_ds100{$ds100} = $station; + push( @{ $old_station_by_name{$name} }, $station ); +} + +sub delete_station { + my ( $name, $ds100, $eva ) = @_; + delete $station_by_name{$name}; + delete $station_by_ds100{$ds100}; + delete $station_by_eva{$eva}; + @{$stations} = grep { + $_->{name} ne $name and $_->{ds100} ne $ds100 and $_->{eva} != $eva + } @{$stations}; +} + +sub delete_old_station { + my ( $name, $ds100, $eva ) = @_; + delete $old_station_by_name{$name}; + delete $old_station_by_ds100{$ds100}; + delete $old_station_by_eva{$eva}; + @{$old_stations} = grep { + $_->{name} ne $name and $_->{ds100} ne $ds100 and $_->{eva} != $eva + } @{$old_stations}; +} + +sub has_departures { + my ($eva) = @_; + + my $status = Travel::Status::DE::IRIS->new( + station => $eva, + lookbehind => 120, + lookahead => 240 + ); + + if ( $status->results ) { + return 1; + } + return; +} + +# TODO falls ein eintrag aus old_stations im XML auftaucht sollte er aus old_stations raus und wieder in stations rein + +if ( -e 'missing.txt' ) { + for my $line ( read_file('missing.txt') ) { + chomp $line; + push( @missing, $line ); + } +} + +my %renamed; +if ( -e 'renamed.json' ) { + $json_str = read_file('renamed.json'); + %renamed = %{ $json->decode($json_str) }; +} + for my $station ( $tree->findnodes('//station') ) { my $name = $station->getAttribute('name'); my $eva = $station->getAttribute('eva'); my $ds100 = $station->getAttribute('ds100'); + my $is_db = $station->getAttribute('db') eq 'true'; - my $found = 0; + my $xml_station = { + name => $name, + eva => $eva, + ds100 => $ds100, + is_db => $is_db, + }; + $xml_by_ds100{$ds100} = $xml_station; + $xml_by_eva{$eva} = $xml_station; - for my $j_station ( @{$stations} ) { - my $j_name = $j_station->{name}; - my $j_ds100 = $j_station->{ds100}; - my $j_eva = $j_station->{eva}; + if ( exists $xml_by_eva{$name} ) { + push( @{ $xml_by_name{$name}{extra} }, $xml_station ); + } + else { + $xml_by_name{$name} = $xml_station; + } +} - if ( $name eq $j_name or $eva == $j_eva ) { - $found = 1; - } +for my $station ( $tree->findnodes('//station') ) { + my $name = $station->getAttribute('name'); + my $eva = $station->getAttribute('eva'); + my $ds100 = $station->getAttribute('ds100'); + my $is_db = $station->getAttribute('db') eq 'true'; - if ( $j_ds100 eq $ds100 and $j_name ne $name ) { - printf( "%8s has been renamed: %30s -> %30s\n", - $ds100, $j_name, $name ); - last; + my $found = 0; + + if ( $station_by_name{$name} or $station_by_eva{$eva} ) { + $found = 1; + } + + if ( $station_by_ds100{$ds100} + and $station_by_ds100{$ds100}{name} ne $name ) + { + printf( "%8s has been renamed: %30s -> %30s\n", + $ds100, $station_by_ds100{$ds100}{name}, $name ); + if ( not $mode or $mode eq 'renamed' ) { + $renamed{ $station_by_ds100{$ds100}{name} } = $name; + $station_by_ds100{$ds100}{name} = $name; } - elsif ( $j_eva == $eva and $j_name ne $name ) { - printf( "%d mismatch: (%s -> %s), (%s -> %s)\n", - $eva, $j_name, $name, $j_ds100, $ds100 ); - last; + } + elsif ( $station_by_eva{$eva} and $station_by_eva{$eva}{name} ne $name ) { + printf( + "%d mismatch: (%s -> %s), (%s -> %s)\n", + $eva, $station_by_eva{$eva}{name}, + $name, $station_by_eva{$eva}{ds100}, $ds100 + ); + } + elsif ( $station_by_name{$name} + and not any { $_->{ds100} eq $ds100 } @{ $station_by_name{$name} } + and $is_db ) + { + printf( "%30s has a new DS100 alias: %8s\n", $name, $ds100 ); + if ( not $mode or $mode eq 'new' ) { + add_station( $name, $ds100, $eva ); } - elsif ( $j_name eq $name and $j_ds100 ne $ds100 ) { - printf( "%30s has been recoded: %8s -> %8s\n", - $name, $j_ds100, $ds100 ); - last; + } + elsif ( $station_by_name{$name} + and not any { $_->{eva} == $eva } @{ $station_by_name{$name} } + and $is_db ) + { + printf( "%30s has a new EVA alias: %d\n", $name, $eva ); + if ( not $mode or $mode eq 'new' ) { + add_station( $name, $ds100, $eva ); } - elsif ( $j_name eq $name and $j_eva != $eva ) { - printf( "%30s has been recoded: %d -> %d\n", $name, $j_eva, $eva ); - last; + } + + if ( + $name !~ m{Betriebsstelle nicht bekannt} + and my $old = ( + $old_station_by_name{$name} // $old_station_by_ds100{$ds100} + // $old_station_by_eva{$eva} + ) + ) + { + printf( "%30s has re-appeared as %s %d\n", $name, $ds100, $eva ); + if ( not $mode or $mode eq 'reappeared' ) { + if ( $extra ne 'if-active' or has_departures($eva) ) { + if ( ref($old) eq 'ARRAY' ) { + for my $o ( @{$old} ) { + delete_old_station( $o->{name}, $o->{ds100}, + $o->{eva} ); + } + } + else { + delete_old_station( $old->{name}, $old->{ds100}, + $old->{eva} ); + } + add_station( $name, $ds100, $eva ); + } + if ( $extra eq 'if-active' ) { + sleep(1); + } } } - if ( not $found - and not( $ds100 =~ m{ ^ [OPXZ] }x or $name =~ m{ ^ Bahnhof, }x ) ) + if ( not $found + and any { $_ eq $name or $_ eq $ds100 } @missing ) { - #say "missing $eva $ds100 \"$name\""; + say "missing $eva $ds100 \"$name\""; + + if ( not $mode or $mode eq 'missing' ) { + add_station( $name, $ds100, $eva ); + if ( $old_station_by_name{$name} ) { + delete_old_station( $name, $ds100, $eva ); + } + } + } +} + +my @to_delete; + +for my $i ( 0 .. $#{$stations} ) { + $stations->[$i]{eva} = 0 + $stations->[$i]{eva}; + my $j_station = $stations->[$i]; + my $j_name = $j_station->{name}; + my $j_ds100 = $j_station->{ds100}; + my $j_eva = $j_station->{eva}; + + if ( not( $xml_by_name{$j_name} or $xml_by_eva{$j_eva} ) ) { + say "station no longer exists: $j_eva $j_ds100 \"$j_name\""; + if ( not $mode or $mode eq 'old' ) { + unshift( @to_delete, $i ); + add_old_station( $j_name, $j_ds100, $j_eva, $j_station->{latlong} ); + } } } -my $json_out = JSON->new->utf8->canonical->pretty->encode($stations); +for my $i ( 0 .. $#{$old_stations} ) { + $old_stations->[$i]{eva} = 0 + $old_stations->[$i]{eva}; +} + +for my $i (@to_delete) { + splice( @{$stations}, $i, 1 ); +} + +my $json_out = $json->canonical->pretty->encode($stations); write_file( 'stations.json', $json_out ); + +$json_out = $json->canonical->pretty->encode($old_stations); +write_file( 'old_stations.json', $json_out ); + +$json_out = $json->encode( \%renamed ); +write_file( 'renamed.json', $json_out ); |