summaryrefslogtreecommitdiff
path: root/lib/FLAT/Regex/Parser.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/FLAT/Regex/Parser.pm')
-rw-r--r--lib/FLAT/Regex/Parser.pm82
1 files changed, 82 insertions, 0 deletions
diff --git a/lib/FLAT/Regex/Parser.pm b/lib/FLAT/Regex/Parser.pm
new file mode 100644
index 0000000..deb73f1
--- /dev/null
+++ b/lib/FLAT/Regex/Parser.pm
@@ -0,0 +1,82 @@
+package FLAT::Regex::Parser;
+use strict;
+
+#### Is this one level of abstraction too far? Parser generator generators..
+
+#### TODO: try YAPP, since recursive descent is SLOOOW
+use Parse::RecDescent;
+use FLAT::Regex::Op;
+
+use vars '$CHAR';
+$CHAR = qr{ [A-Za-z0-9_\$\#] | \[[^\]]*\] }x;
+
+sub new {
+ my $pkg = shift;
+ my @ops = sort { $a->{prec} <=> $b->{prec} }
+ map {{
+ pkg => "FLAT::Regex::Op::$_",
+ prec => "FLAT::Regex::Op::$_"->precedence,
+ spec => "FLAT::Regex::Op::$_"->parse_spec,
+ short => $_
+ }} @_;
+
+ my $lowest = shift @ops;
+ my $grammar = qq!
+ parse:
+ $lowest->{short} /^\\Z/ { \$item[1] }
+ !;
+
+ my $prev = $lowest;
+ for (@ops) {
+ my $spec = sprintf $prev->{spec}, $_->{short};
+
+ $grammar .= qq!
+ $prev->{short}:
+ $spec { $prev->{pkg}\->from_parse(\@item) }
+ | $_->{short} { \$item[1] }
+ !;
+
+ $prev = $_;
+ }
+
+ my $spec = sprintf $prev->{spec}, "atomic";
+ $grammar .= qq!
+ $prev->{short}:
+ $spec { $prev->{pkg}\->from_parse(\@item) }
+ | atomic { \$item[1] }
+
+ atomic:
+ "(" $lowest->{short} ")" { \$item[2] }
+ | /\$FLAT::Regex::Parser::CHAR/
+ { FLAT::Regex::Op::atomic->from_parse(\@item) }
+ !;
+
+ Parse::RecDescent->new($grammar);
+}
+
+1;
+
+
+__END__
+
+original parser:
+
+use vars '$CHAR';
+$CHAR = qr{ [A-Za-z0-9_\!\@\#\$\%\&] | \[[^\]]*\] }x;
+
+my $PARSER = Parse::RecDescent->new(<<'__EOG__') or die;
+ parse:
+ alt /^\Z/ { $item[1] }
+ alt:
+ concat(2.. /[+|]/) { FLAT::Regex::Op::alt->from_parse(@item) }
+ | concat { $item[1] }
+ concat:
+ star(2..) { FLAT::Regex::Op::concat->from_parse(@item) }
+ | star { $item[1] }
+ star :
+ atomic '*' { FLAT::Regex::Op::star->from_parse(@item) }
+ | atomic { $item[1] }
+ atomic:
+ "(" alt ")" { $item[2] }
+ | /$FLAT::Regex::CHAR/ { FLAT::Regex::Op::atomic->from_parse(@item) }
+__EOG__