summaryrefslogtreecommitdiff
path: root/scripts/acronyms.pl
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/acronyms.pl')
-rwxr-xr-xscripts/acronyms.pl89
1 files changed, 17 insertions, 72 deletions
diff --git a/scripts/acronyms.pl b/scripts/acronyms.pl
index 6ac2d16..9c14164 100755
--- a/scripts/acronyms.pl
+++ b/scripts/acronyms.pl
@@ -3,9 +3,8 @@
use strict;
use warnings;
use 5.010;
-use Encode qw(decode encode);
-use List::Util qw(max sum);
-use List::MoreUtils qw(true);
+use Encode qw(encode);
+use Text::CSV;
say <<'EOF';
package Travel::Status::DE::IRIS::Stations;
@@ -22,86 +21,32 @@ use Text::LevenshteinXS qw(distance);
# TODO switch to Text::Levenshtein::XS once AUR/Debian packages become available
-our $VERSION = '1.02';
+our $VERSION = '1.04';
my @stations = (
EOF
-my @buf;
-
-sub process_block {
- my @histogram;
- my @borders = (0);
- my $run = 0;
-
- my $length = max (map { length($_) } @buf);
-
- for my $i (0 .. $length) {
- $histogram[$i] = true { length($_) < $i or substr($_, $i, 1) eq q{ } } @buf;
-
- if ($histogram[$i] == @buf) {
- if (not $run) {
- push(@borders, $i);
- $run = 1;
- }
- }
- else {
- $run = 0;
- }
- }
- for my $i (0 .. $#borders / 2) {
- for my $line (@buf) {
- my $station_offset = $borders[2 * $i];
- my $name_offset = $borders[2 * $i + 1];
- my $station_length = $name_offset - $station_offset;
- my $name_length = $borders[2 * $i + 2] ? ($borders[2 * $i + 2] - $name_offset) : undef;
-
- if (length($line) < $station_offset) {
- next;
- }
-
- my $station = substr($line, $station_offset, $station_length);
- my $name = $name_length ? substr($line, $name_offset, $name_length) : substr($line, $name_offset);
-
- $station =~ s{^\s+}{};
- $station =~ s{\s+$}{};
- $station =~ s{\s+}{ }g;
- $name =~ s{!}{ }g;
- $name =~ s{^\s+}{};
- $name =~ s{\s+$}{};
- $name =~ s{\s+}{ }g;
- $name =~ s{'}{\\'}g;
-
- if (length($station) == 0) {
- next;
- }
-
- printf("\t['%s','%s'],\n", encode('UTF-8', $station), encode('UTF-8', $name));
- }
- }
-}
-
+my $csv = Text::CSV->new({binary => 1, sep_char => q{;}});
while (my $line = <STDIN>) {
- chomp $line;
- $line = decode('UTF-8', $line);
+# chomp $line;
+# $line = decode('UTF-8', $line);
- if (length($line) == 0 and @buf) {
- process_block();
- @buf = ();
- }
+ my $status = $csv->parse($line);
+ my @fields = $csv->fields;
- if ($line !~ m{ ^ [A-Z]{2} }x and $line !~ m{ \s [A-Z]{2,5} \s }x) {
+ if ($fields[0] eq 'Abk') {
next;
}
- $line =~ s{RB-Gr km}{RB-Gr!km}g;
- $line =~ s{RB-Gr!km\s++}{RB-Gr!km!}g;
- $line =~ s{Bad }{Bad!}g;
+ my ($station, $name, $country, $location, $valid_since) = @fields;
- push(@buf, $line);
-}
-if (@buf) {
- process_block();
+ $name =~ s{!}{ }g;
+ $name =~ s{^\s+}{};
+ $name =~ s{\s+$}{};
+ $name =~ s{\s+}{ }g;
+ $name =~ s{'}{\\'}g;
+
+ printf("\t['%s','%s'],\n", encode('UTF-8', $station), encode('UTF-8', $name));
}
say <<'EOF';