From 33ce867b1bd4abb8fc9dde049507937182b9c6e3 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Mon, 18 Nov 2019 20:09:07 +0100 Subject: csv2json, json2json: Perform consistency checks --- README.md | 6 +++--- share/csv2json | 4 ++-- share/json2json | 30 ++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index d53b36b..42cd267 100644 --- a/README.md +++ b/README.md @@ -73,9 +73,9 @@ located in `share/stations.json`. There are two recommended editing methods. Automatic method, e.g. to incorporate changes from Open Data sources: * modify stations.json with a script in any JSON-aware language you like -* run ./json2json in the share diretcory. This transforms stations.json into - its canonical format, which simplifies tracking of changes and reduces diff - size +* run `./json2json` in the share diretcory. This performs consistency checks and + transforms stations.json into its canonical format, which simplifies tracking + of changes and reduces diff size Manual method: diff --git a/share/csv2json b/share/csv2json index 9e6340c..2bac49f 100755 --- a/share/csv2json +++ b/share/csv2json @@ -50,8 +50,8 @@ for my $line (@csv_lines) { my $have_duplicates = 0; my @names = map { $_->{name} } @stations; -my @ds100 = map { $_->{ds100} } @stations; -my @uic_ids = map { $_->{uic} } @stations; +my @ds100 = map { $_->{ds100} } sort { $a->{ds100} cmp $b->{ds100} } @stations; +my @uic_ids = map { $_->{uic} } sort { $a->{uic} <=> $b->{uic} } @stations; for my $i ( 1 .. $#names ) { if ( $names[ $i - 1 ] eq $names[$i] ) { diff --git a/share/json2json b/share/json2json index 5cad10f..189ee2c 100755 --- a/share/json2json +++ b/share/json2json @@ -10,5 +10,35 @@ use JSON; my $json_str = read_file('stations.json'); my $stations = JSON->new->utf8->decode($json_str); @{$stations} = sort { $a->{name} cmp $b->{name} } @{$stations}; + +my $have_duplicates = 0; +my @names = map { $_->{name} } @{$stations}; +my @ds100 = map { $_->{ds100} } sort { $a->{ds100} cmp $b->{ds100} } @{$stations}; +my @uic_ids = map { $_->{uic} } sort { $a->{uic} <=> $b->{uic} } @{$stations}; + +for my $i ( 1 .. $#names ) { + if ( $names[ $i - 1 ] eq $names[$i] ) { + say "Duplicate station name: $names[$i]"; + $have_duplicates = 1; + } +} +for my $i ( 1 .. $#ds100 ) { + if ( $ds100[ $i - 1 ] eq $ds100[$i] ) { + say "Duplicate DS100 code: $ds100[$i]"; + $have_duplicates = 1; + } +} +for my $i ( 1 .. $#uic_ids ) { + if ( $uic_ids[ $i - 1 ] == $uic_ids[$i] ) { + say "Duplicate UIC ID: $uic_ids[$i]"; + $have_duplicates = 1; + } +} + +if ($have_duplicates) { + say "Thank you for your contribution."; + say "Please remove duplicate entries before opening a pull request."; +} + my $json_out = JSON->new->utf8->canonical->pretty->encode($stations); write_file( 'stations.json', $json_out ); -- cgit v1.2.3