#!/usr/bin/env perl # Copyright (C) 2020 Daniel Friesel # # SPDX-License-Identifier: BSD-2-Clause use strict; use warnings; use 5.020; use File::Slurp qw(write_file); use IPC::Run3; use JSON; sub show_usage { my ($exit_code) = @_; say STDERR "Usage: db-wagenreihung-to-json \n"; say STDERR "You can obtain the latest PDF from "; exit $exit_code; } if ( @ARGV == 0 ) { show_usage(1); } run3( [ "which", "pdftotext" ], \undef, \undef, \undef ); if ($?) { say STDERR "Error: db-wagenreihung-to-json depends on the pdftotext utility, but could not find it on this system.\n"; say STDERR "On Debian and derivatives, it should be available in the 'poppler-utils' package."; exit 2; } my $wr_pdf = $ARGV[0]; my $wr_txt; my $command = [ "pdftotext", "-layout", "-nopgbrk", $wr_pdf, "/dev/stdout" ]; # stdin: /dev/null # stdout: $wr_txt # stderr: forwarded to terminal run3( $command, \undef, \$wr_txt, undef, { binmode_stdout => ':encoding(utf-8)' } ); if ($?) { my $exit_status = $? >> 8; say STDERR "\nError: 'pdftotext $wr_pdf' failed (exit status: $exit_status)"; exit 2; } my @lines = split( qr{\n}, $wr_txt ); my ( $type, $number ); my %map; my %wagon_map; my $state = ""; for my $line (@lines) { if ( $line =~ m{ ^ (? [EINR][CJN][ A-Z-]{6}) (? [ 0-9]{2,12}) }x ) { $type = $+{type}; $type =~ tr{ }{}d; $number = $+{number}; $number =~ tr{ }{}d; if ( $number !~ m{ ^ \d+ $ }x ) { next; } $state = "train_no"; #say "$type $number"; $map{$number} = { raw => $type, }; if ( $type eq 'ICE-A' ) { $map{$number}{type} = 'ICE 1/2/4'; } elsif ( $type eq 'ICE-W' ) { $map{$number}{type} = 'ICE 3'; $map{$number}{short} = '3'; } elsif ( $type eq 'ICE' ) { $map{$number}{type} = 'ICE 1/2/4'; } elsif ( $type eq 'ICE-T' ) { $map{$number}{type} = 'ICE T'; $map{$number}{short} = 'T'; } elsif ( $type eq 'IC-A' ) { $map{$number}{type} = 'Metropolitan'; $map{$number}{short} = 'M'; } elsif ( $type eq 'IC-D' ) { $map{$number}{type} = 'IC2'; $map{$number}{short} = '2'; } elsif ( $type eq 'IC-E' ) { $map{$number}{type} = 'IC2 KISS'; $map{$number}{short} = '2'; } else { $map{$number}{type} = $type; } } elsif ( $line =~ m{^LPF} ) { # Not in passenger service ("Leehrfahrt"). Ignore it for now. # FWIW, LPF(T) can be further distinguished: # LPFT-A: ICE BR 401/402 # LPFT-B: ICE BR 412 # LPFT-T: ICE BR 411 / 415 / 605 # LPFT-W: ICE BR 403 / 406 $number = undef; $state = ""; } elsif ( $state eq "train_no" and $line =~ m{ ^ \s{5} \S }x ) { $state = "route"; $map{$number}{route} = ""; } # ICE types can be further distinguished by rolling stock. For instance, # ICE 4 is the only ICE-A variant with restaurant and first-class seats # in the same wagon ("ARmz"). elsif ( $number and $line =~ m{^\d*\s+ARmz} ) { if ( $map{$number}{type} eq 'ICE 1/2/4' ) { $map{$number}{type} = 'ICE 4'; $map{$number}{short} = '4'; } } elsif ( $number and $line =~ m{^\d*\s+WRmz} ) { #say "ICE 1 / ICE 3 Redesign (WRmz)"; if ( $map{$number}{type} eq 'ICE 1/2/4' ) { $map{$number}{type} = 'ICE 1'; $map{$number}{short} = '1'; } elsif ( $map{$number}{type} eq 'ICE 3' ) { $map{$number}{type} = 'ICE 3 Redesign'; $map{$number}{short} = '3R'; } } elsif ( $number and $line =~ m{^\d*\s+WRmbsz} ) { #say "ICE 2 (WRmbsz)"; if ( $map{$number}{type} eq 'ICE 1/2/4' ) { $map{$number}{type} = 'ICE 2'; $map{$number}{short} = '2'; } } elsif ( $number and $line =~ m{^\d*\s+Bpmbsz} ) { #say "ICE 3 Velaro"; if ( $map{$number}{type} eq 'ICE 3' ) { $map{$number}{type} = 'ICE 3 Velaro'; $map{$number}{short} = '3V'; } } if ( $state eq "route" and $line =~ m{ ^ \s{5} (? .* ) $ }x ) { if ( length( $map{$number}{route} ) ) { $map{$number}{route} .= ' '; } $map{$number}{route} .= $+{route}; } elsif ( $state eq "route" and $line eq "" ) { $state = "conditions"; } elsif ( $state eq "conditions" and $line =~ m{ Hg | Tfz | BrH }x ) { $state = "details"; } if ( $number and $line =~ m{ ^ \d* \s{1,10} (? D? (WL)? (A|AB|B|W) R? D? [aceimpvw] \S* ) \s* (? \d+ )? (?: $ | \s{6} ) }x ) { $state = "wagonorder"; push( @{ $wagon_map{$number} }, [ $+{type}, $+{number} ] ); #$map{$number}{has_wagon}{$1} = \1; $map{$number}{wagons}{$1} = \1; } if ( $state eq "conditions" and length($line) and $line =~ m{ \S }x ) { push( @{ $map{$number}{conditions} }, $line ); } if ( $state eq "details" and length($line) and $line =~ m{ Hg | Tfz | BrH }x ) { push( @{ $map{$number}{details} }, $line ); } } for my $train_number ( keys %map ) { my $wagon_numbers_ok = 1; my $wagon_types_ok = 1; my %bincount; my %type_by_wagon_number; for my $wagon ( @{ $wagon_map{$train_number} // [] } ) { my ( $wagon_type, $wagon_number ) = @{$wagon}; if ( not $wagon_number ) { $wagon_numbers_ok = 0; } else { $bincount{$wagon_number}++; push( @{ $type_by_wagon_number{$wagon_number} }, $wagon_type ); } } if ($wagon_numbers_ok) { for my $wagon ( @{ $wagon_map{$train_number} // [] } ) { my ( $wagon_type, $wagon_number ) = @{$wagon}; $map{$train_number}{wagon}{ $wagon->[1] } = $wagon_type; if ( $bincount{$wagon_number} > 1 ) { if ( $type_by_wagon_number{$wagon_number}[0] ne $type_by_wagon_number{$wagon_number}[1] ) { $wagon_types_ok = 0; } } } } if ( not $wagon_types_ok ) { # train type may differ depending on date of week or similar. We don't # parse/handle that yet. delete $map{$train_number}{type}; delete $map{$train_number}{short}; delete $map{$train_number}{wagon}; delete $map{$train_number}{has_wagon}; } } # use canonical output (i.e., sort hash keys) to allow for easy diffing. say JSON->new->utf8->canonical->encode( {%map} );