From 87a5ea0b1aba22a607d91a6d7647520e373a87d2 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Sun, 20 Sep 2009 00:07:49 +0200 Subject: Improved readability of the regular expressions --- bin/efa | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/bin/efa b/bin/efa index 93c1e7a..38864f9 100755 --- a/bin/efa +++ b/bin/efa @@ -35,9 +35,25 @@ my $ignore_info = 'Fahrradmitnahme'; sub check_ambiguous { my $html = shift; - my $choose_re = qr#Bitte auswählen#; - my $select_re = qr# + (? [^<]+ ) + + }x; if ($html =~ /$choose_re/s) { foreach (split(/$choose_re/s, $html)) { @@ -57,25 +73,44 @@ sub parse_content { my $raw = shift; my $groupsize = 8; my $return; + my $time_re = qr{ \d+ : \d+ }x; + my $ext_time_re = qr{ + ^ ( + $time_re + | + ab \s + | + ) $ + }x; + my $anschluss_re = qr{ + ^ ( + Fußweg + | + Anschluss \s wird .* abgewartet + ) + }x; + for (my $offer = 0; exists($raw->[$offer]); $offer++) { foreach (@{$raw->[$offer]}) { s/\s*
\s*/, /gx; s/< [^>]+ >//gx; } + for (my $i = 0; @{$raw->[$offer]} >= (($i+1) * $groupsize) - 1; $i++) { my $offset = $i * $groupsize; my @extra; if ( - $raw->[$offer]->[$offset+2] =~ /^(Fußweg | Anschluss \s wird .* abgewartet)/x - or $raw->[$offer]->[$offset+3] =~ /^Fußweg/ + $raw->[$offer]->[$offset+2] =~ $anschluss_re + or $raw->[$offer]->[$offset+3] =~ / ^ Fußweg /x ) { # These are generic and usually lack both the time and the last element - if ($raw->[$offer]->[$offset ] !~ /\d+:\d+/) {splice(@{$raw->[$offer]}, $offset , 0, '')} - if ($raw->[$offer]->[$offset+4] !~ /\d+:\d+/) {splice(@{$raw->[$offer]}, $offset+4, 0, '')} + if ($raw->[$offer]->[$offset ] !~ $time_re) {splice(@{$raw->[$offer]}, $offset , 0, '')} + if ($raw->[$offer]->[$offset+4] !~ $time_re) {splice(@{$raw->[$offer]}, $offset+4, 0, '')} splice(@{$raw->[$offer]}, $offset+7, 0, ''); } + for my $j (0, 4, 8) { - until (not exists($raw->[$offer]->[$offset+$j]) or $raw->[$offer]->[$offset+$j] =~ /^(\d+ : \d+ | ab \s |)$/x) { + until (not exists($raw->[$offer]->[$offset+$j]) or $raw->[$offer]->[$offset+$j] =~ $ext_time_re) { last unless (exists($raw->[$offer]->[$offset+$j])); last if ($raw->[$offer]->[$offset+$j] eq 'Verspätungen sind berücksichtigt'); if ($raw->[$offer]->[$offset+$j] =~ /^ \s* $/x) { @@ -85,6 +120,7 @@ sub parse_content { } } } + $return->[$offer]->[$i] = { deptime => $raw->[$offer]->[$offset], dep => $raw->[$offer]->[$offset+1], @@ -105,13 +141,23 @@ sub prepare_content { my $html = shift; my $offer = 0; my $return; - foreach (split(/ \d+\. Fahrt<\/span>/, $html)) { + my $split_re = qr{ + + \s \d+ \. \s Fahrt + + }x; + my $content_re = qr{ + + (? .+ ) + + }x; + foreach (split($split_re, $html)) { unless ($offer) { $offer++; next; } foreach (split(/\n/)) { - if (/(?.+)<\/span><\/td>/) { + if ($_ =~ $content_re) { push(@{$return->[$offer-1]}, $+{content}); } } -- cgit v1.2.3