From e6b65a276baffc6910568f095a54069c20ee8014 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Thu, 10 Dec 2020 19:33:35 +0100 Subject: initial commit --- bin/db-wagenreihung-to-json | 162 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100755 bin/db-wagenreihung-to-json (limited to 'bin') diff --git a/bin/db-wagenreihung-to-json b/bin/db-wagenreihung-to-json new file mode 100755 index 0000000..ef71aa0 --- /dev/null +++ b/bin/db-wagenreihung-to-json @@ -0,0 +1,162 @@ +#!/usr/bin/env perl +# Copyright (C) 2020 Daniel Friesel +# +# SPDX-License-Identifier: BSD-2-Clause + +use strict; +use warnings; +use 5.020; + +use File::Slurp qw(write_file); +use IPC::Run3; +use JSON; + +sub show_usage { + my ($exit_code) = @_; + + say STDERR "Usage: db-wagenreihung-to-json \n"; + say STDERR +"You can obtain the latest PDF from "; + + exit $exit_code; +} + +if ( @ARGV == 0 ) { + show_usage(1); +} + +run3( [ "which", "pdftotext" ], \undef, \undef, \undef ); +if ($?) { + say STDERR +"Error: db-wagenreihung-to-json depends on the pdftotext utility, but could not find it on this system.\n"; + say STDERR +"On Debian and derivatives, it should be available in the 'poppler-utils' package."; + exit 2; +} + +my $wr_pdf = $ARGV[0]; +my $wr_txt; +my $command = [ "pdftotext", "-layout", $wr_pdf, "/dev/stdout" ]; + +# stdin: /dev/null +# stdout: $wr_txt +# stderr: forwarded to terminal +run3( $command, \undef, \$wr_txt, undef ); + +if ($?) { + my $exit_status = $? >> 8; + say STDERR + "\nError: 'pdftotext $wr_pdf' failed (exit status: $exit_status)"; + exit 2; +} + +my @lines = split( qr{\n}, $wr_txt ); + +my ( $type, $number ); +my %map; + +for my $line (@lines) { + + if ( $line + =~ m{ ^ (? [EINR][CJN][ A-Z-]{6}) (? [ 0-9]{2,12}) }x ) + { + $type = $+{type}; + $type =~ tr{ }{}d; + $number = $+{number}; + $number =~ tr{ }{}d; + + if ( $number !~ m{ ^ \d+ $ }x ) { + next; + } + + #say "$type $number"; + $map{$number} = { + raw => $type, + }; + + if ( $type eq 'ICE-A' ) { + $map{$number}{type} = 'ICE 1/2/4'; + } + elsif ( $type eq 'ICE-W' ) { + $map{$number}{type} = 'ICE 3'; + $map{$number}{short} = '3'; + } + elsif ( $type eq 'ICE' ) { + $map{$number}{type} = 'ICE 1/2/4'; + } + elsif ( $type eq 'ICE-T' ) { + $map{$number}{type} = 'ICE T'; + $map{$number}{short} = 'T'; + } + elsif ( $type eq 'IC-A' ) { + $map{$number}{type} = 'Metropolitan'; + $map{$number}{short} = 'M'; + } + elsif ( $type eq 'IC-D' ) { + $map{$number}{type} = 'IC2'; + $map{$number}{short} = '2'; + } + elsif ( $type eq 'IC-E' ) { + $map{$number}{type} = 'IC2 KISS'; + $map{$number}{short} = '2'; + } + else { + $map{$number}{type} = $type; + } + } + elsif ( $line =~ m{^LPF} ) { + + # Not in passenger service ("Leehrfahrt"). Ignore it for now. + # FWIW, LPF(T) can be further distinguished: + # LPFT-A: ICE BR 401/402 + # LPFT-B: ICE BR 412 + # LPFT-T: ICE BR 411 / 415 / 605 + # LPFT-W: ICE BR 403 / 406 + $number = undef; + } + + # ICE types can be further distinguished by rolling stock. For instance, + # ICE 4 is the only ICE-A variant with restaurant and first-class seats + # in the same wagon ("ARmz"). + elsif ( $number and $line =~ m{^\d*\s+ARmz} ) { + if ( $map{$number}{type} eq 'ICE 1/2/4' ) { + $map{$number}{type} = 'ICE 4'; + $map{$number}{short} = '4'; + } + } + elsif ( $number and $line =~ m{^\d*\s+WRmz} ) { + + #say "ICE 1 / ICE 3 Redesign (WRmz)"; + if ( $map{$number}{type} eq 'ICE 1/2/4' ) { + $map{$number}{type} = 'ICE 1'; + $map{$number}{short} = '1'; + } + elsif ( $map{$number}{type} eq 'ICE 3' ) { + $map{$number}{type} = 'ICE 3 Redesign'; + $map{$number}{short} = '3R'; + } + } + elsif ( $number and $line =~ m{^\d*\s+WRmbsz} ) { + + #say "ICE 2 (WRmbsz)"; + if ( $map{$number}{type} eq 'ICE 1/2/4' ) { + $map{$number}{type} = 'ICE 2'; + $map{$number}{short} = '2'; + } + } + elsif ( $number and $line =~ m{^\d*\s+Bpmbsz} ) { + + #say "ICE 3 Velaro"; + if ( $map{$number}{type} eq 'ICE 3' ) { + $map{$number}{type} = 'ICE 3 Velaro'; + $map{$number}{short} = '3V'; + } + } + + if ( $number and $line =~ m{^\d*\s+(D?(A|AB|B|W)R?D?[impvw]\S*)} ) { + $map{$number}{wagons}{$1} = \1; + } +} + +# use canonical output (i.e., sort hash keys) to allow for easy diffing. +say JSON->new->utf8->canonical->encode( {%map} ); -- cgit v1.2.3