diff options
author | Daniel Friesel <derf@derf.homelinux.org> | 2009-09-20 00:07:49 +0200 |
---|---|---|
committer | Daniel Friesel <derf@derf.homelinux.org> | 2009-09-20 00:07:49 +0200 |
commit | 87a5ea0b1aba22a607d91a6d7647520e373a87d2 (patch) | |
tree | d957f19a8b8cd011b91e5dbd07a91c1c81cd4008 /bin | |
parent | 9d02c50f3fd49daede6e67b6d9e77d47439aab9d (diff) |
Improved readability of the regular expressions
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/efa | 66 |
1 files changed, 56 insertions, 10 deletions
@@ -35,9 +35,25 @@ my $ignore_info = 'Fahrradmitnahme'; sub check_ambiguous { my $html = shift; - my $choose_re = qr#<span class="errorTextBold">Bitte auswählen</span>#; - my $select_re = qr#<select name="(?<what>(place|type|name)_(origin|destination))"#; - my $option_re = qr#<option value="\d+(:\d+)*"( selected)?>(?<choice>[^<]+)</option>#; + my $choose_re = qr{ + <span \s class="errorTextBold"> + Bitte \s auswählen + </span> + }x; + my $select_re = qr{ + <select \s name=" + (?<what> + ( place | type | name ) + _ + ( origin | destination ) + ) " + }x; + my $option_re = qr{ + <option \s value=" \d+ ( : \d+ )* " + ( \s selected )? > + (?<choice> [^<]+ ) + </option> + }x; if ($html =~ /$choose_re/s) { foreach (split(/$choose_re/s, $html)) { @@ -57,25 +73,44 @@ sub parse_content { my $raw = shift; my $groupsize = 8; my $return; + my $time_re = qr{ \d+ : \d+ }x; + my $ext_time_re = qr{ + ^ ( + $time_re + | + ab \s + | + ) $ + }x; + my $anschluss_re = qr{ + ^ ( + Fußweg + | + Anschluss \s wird .* abgewartet + ) + }x; + for (my $offer = 0; exists($raw->[$offer]); $offer++) { foreach (@{$raw->[$offer]}) { s/\s* <br> \s*/, /gx; s/< [^>]+ >//gx; } + for (my $i = 0; @{$raw->[$offer]} >= (($i+1) * $groupsize) - 1; $i++) { my $offset = $i * $groupsize; my @extra; if ( - $raw->[$offer]->[$offset+2] =~ /^(Fußweg | Anschluss \s wird .* abgewartet)/x - or $raw->[$offer]->[$offset+3] =~ /^Fußweg/ + $raw->[$offer]->[$offset+2] =~ $anschluss_re + or $raw->[$offer]->[$offset+3] =~ / ^ Fußweg /x ) { # These are generic and usually lack both the time and the last element - if ($raw->[$offer]->[$offset ] !~ /\d+:\d+/) {splice(@{$raw->[$offer]}, $offset , 0, '')} - if ($raw->[$offer]->[$offset+4] !~ /\d+:\d+/) {splice(@{$raw->[$offer]}, $offset+4, 0, '')} + if ($raw->[$offer]->[$offset ] !~ $time_re) {splice(@{$raw->[$offer]}, $offset , 0, '')} + if ($raw->[$offer]->[$offset+4] !~ $time_re) {splice(@{$raw->[$offer]}, $offset+4, 0, '')} splice(@{$raw->[$offer]}, $offset+7, 0, ''); } + for my $j (0, 4, 8) { - until (not exists($raw->[$offer]->[$offset+$j]) or $raw->[$offer]->[$offset+$j] =~ /^(\d+ : \d+ | ab \s |)$/x) { + until (not exists($raw->[$offer]->[$offset+$j]) or $raw->[$offer]->[$offset+$j] =~ $ext_time_re) { last unless (exists($raw->[$offer]->[$offset+$j])); last if ($raw->[$offer]->[$offset+$j] eq 'Verspätungen sind berücksichtigt'); if ($raw->[$offer]->[$offset+$j] =~ /^ \s* $/x) { @@ -85,6 +120,7 @@ sub parse_content { } } } + $return->[$offer]->[$i] = { deptime => $raw->[$offer]->[$offset], dep => $raw->[$offer]->[$offset+1], @@ -105,13 +141,23 @@ sub prepare_content { my $html = shift; my $offer = 0; my $return; - foreach (split(/<span class="labelTextBold"> \d+\. Fahrt<\/span>/, $html)) { + my $split_re = qr{ + <span \s class="labelTextBold"> + \s \d+ \. \s Fahrt + </span> + }x; + my $content_re = qr{ + <span \s class="labelText" ( \s valign="center" )? > + (?<content> .+ ) + </span> </td> + }x; + foreach (split($split_re, $html)) { unless ($offer) { $offer++; next; } foreach (split(/\n/)) { - if (/<span class="labelText"( valign="center")?>(?<content>.+)<\/span><\/td>/) { + if ($_ =~ $content_re) { push(@{$return->[$offer-1]}, $+{content}); } } |