summaryrefslogtreecommitdiff
path: root/lib/FLAT/Regex
diff options
context:
space:
mode:
Diffstat (limited to 'lib/FLAT/Regex')
-rw-r--r--lib/FLAT/Regex/Op.pm282
-rw-r--r--lib/FLAT/Regex/Parser.pm82
-rw-r--r--lib/FLAT/Regex/Transform.pm18
-rw-r--r--lib/FLAT/Regex/Util.pm33
-rw-r--r--lib/FLAT/Regex/WithExtraOps.pm109
5 files changed, 524 insertions, 0 deletions
diff --git a/lib/FLAT/Regex/Op.pm b/lib/FLAT/Regex/Op.pm
new file mode 100644
index 0000000..76e796c
--- /dev/null
+++ b/lib/FLAT/Regex/Op.pm
@@ -0,0 +1,282 @@
+package FLAT::Regex::Op;
+use strict;
+
+sub new {
+ my $pkg = shift;
+ ## flatten alike operations, i.e, "a+(b+c)" into "a+b+c"
+ my @flat = map { UNIVERSAL::isa($_, $pkg) ? $_->members : $_ } @_;
+
+ bless \@flat, $pkg;
+}
+
+sub members {
+ my $self = shift;
+ wantarray ? @$self[0 .. $#$self] : $self->[0];
+}
+
+
+#################################
+#### regex operators / components
+
+package FLAT::Regex::Op::atomic;
+use base 'FLAT::Regex::Op';
+
+sub as_string {
+ my $t = $_[0]->members;
+
+ return "#" if not defined $t;
+ return $t =~ /^\w$/
+ ? $t
+ : "[$t]";
+}
+
+sub as_perl_regex {
+ my $r = $_[0]->members;
+
+ return "(?!)" if not defined $r;
+
+ $r = quotemeta $r;
+ return $r =~ /^\w$/ ? $r : "(?:$r)";
+}
+
+sub as_nfa {
+ FLAT::NFA->singleton( $_[0]->members );
+}
+
+sub as_pfa {
+ FLAT::PFA->singleton( $_[0]->members );
+}
+
+sub from_parse {
+ my ($pkg, @item) = @_;
+ my $i = $item[1];
+
+ return $pkg->new("") if $i eq "[]";
+ return $pkg->new(undef) if $i eq "#";
+
+ $i =~ s/^\[|\]$//g;
+
+ return $pkg->new($i);
+}
+
+sub reverse {
+ $_[0];
+}
+
+sub is_empty {
+ not defined $_[0]->members;
+}
+
+sub has_nonempty_string {
+ my $self = shift;
+ defined $self->members and length $self->members;
+}
+
+sub is_finite {
+ 1
+}
+
+##############################
+package FLAT::Regex::Op::star;
+use base 'FLAT::Regex::Op';
+
+sub parse_spec { "%s '*'" }
+sub precedence { 30 }
+
+sub as_string {
+ my ($self, $prec) = @_;
+ my $result = $self->members->as_string($self->precedence) . "*";
+ return $prec > $self->precedence ? "($result)" : $result;
+}
+
+sub as_perl_regex {
+ my ($self, $prec) = @_;
+ my $result = $self->members->as_perl_regex($self->precedence) . "*";
+ return $prec > $self->precedence ? "(?:$result)" : $result;
+}
+
+sub as_nfa {
+ my $self = shift;
+ $self->members->as_nfa->kleene;
+}
+
+sub as_pfa {
+ my $self = shift;
+ $self->members->as_pfa->kleene;
+}
+
+sub from_parse {
+ my ($pkg, @item) = @_;
+ $pkg->new( $item[1] );
+}
+
+sub reverse {
+ my $self = shift;
+ my $op = $self->members->reverse;
+ __PACKAGE__->new($op);
+}
+
+sub is_empty {
+ 0
+}
+
+sub has_nonempty_string {
+ $_[0]->members->has_nonempty_string;
+}
+
+sub is_finite {
+ ! $_[0]->members->has_nonempty_string;
+}
+
+
+################################
+package FLAT::Regex::Op::concat;
+use base 'FLAT::Regex::Op';
+
+sub parse_spec { "%s(2..)"; }
+sub precedence { 20 }
+
+sub as_string {
+ my ($self, $prec) = @_;
+ my $result = join "",
+ map { $_->as_string($self->precedence) }
+ $self->members;
+ return $prec > $self->precedence ? "($result)" : $result;
+}
+
+sub as_perl_regex {
+ my ($self, $prec) = @_;
+ my $result = join "",
+ map { $_->as_perl_regex($self->precedence) }
+ $self->members;
+ return $prec > $self->precedence ? "(?:$result)" : $result;
+}
+
+sub as_nfa {
+ my $self = shift;
+ my @parts = map { $_->as_nfa } $self->members;
+ $parts[0]->concat( @parts[1..$#parts] );
+}
+
+sub as_pfa {
+ my $self = shift;
+ my @parts = map { $_->as_pfa } $self->members;
+ $parts[0]->concat( @parts[1..$#parts] );
+}
+
+sub from_parse {
+ my ($pkg, @item) = @_;
+ $pkg->new( @{ $item[1] } );
+}
+
+## note: "reverse" conflicts with perl builtin
+sub reverse {
+ my $self = shift;
+ my @ops = CORE::reverse map { $_->reverse } $self->members;
+ __PACKAGE__->new(@ops);
+}
+
+sub is_empty {
+ my $self = shift;
+ my @members = $self->members;
+ for (@members) {
+ return 1 if $_->is_empty;
+ }
+ return 0;
+}
+
+sub has_nonempty_string {
+ my $self = shift;
+ return 0 if $self->is_empty;
+
+ my @members = $self->members;
+ for (@members) {
+ return 1 if $_->has_nonempty_string;
+ }
+ return 0;
+}
+
+sub is_finite {
+ my $self = shift;
+ return 1 if $self->is_empty;
+
+ my @members = $self->members;
+ for (@members) {
+ return 0 if not $_->is_finite;
+ }
+ return 1;
+}
+
+#############################
+package FLAT::Regex::Op::alt;
+use base 'FLAT::Regex::Op';
+
+sub parse_spec { "%s(2.. /[+|]/)" }
+sub precedence { 10 }
+
+sub as_string {
+ my ($self, $prec) = @_;
+ my $result = join "+",
+ map { $_->as_string($self->precedence) }
+ $self->members;
+ return $prec > $self->precedence ? "($result)" : $result;
+}
+
+sub as_perl_regex {
+ my ($self, $prec) = @_;
+ my $result = join "|",
+ map { $_->as_perl_regex($self->precedence) }
+ $self->members;
+ return $prec > $self->precedence ? "(?:$result)" : $result;
+}
+
+sub as_nfa {
+ my $self = shift;
+ my @parts = map { $_->as_nfa } $self->members;
+ $parts[0]->union( @parts[1..$#parts] );
+}
+
+sub as_pfa {
+ my $self = shift;
+ my @parts = map { $_->as_pfa } $self->members;
+ $parts[0]->union( @parts[1..$#parts] );
+}
+
+sub from_parse {
+ my ($pkg, @item) = @_;
+ $pkg->new( @{ $item[1] } );
+}
+
+sub reverse {
+ my $self = shift;
+ my @ops = map { $_->reverse } $self->members;
+ __PACKAGE__->new(@ops);
+}
+
+sub is_empty {
+ my $self = shift;
+ my @members = $self->members;
+ for (@members) {
+ return 0 if not $_->is_empty;
+ }
+ return 1;
+}
+
+sub has_nonempty_string {
+ my $self = shift;
+ my @members = $self->members;
+ for (@members) {
+ return 1 if $_->has_nonempty_string;
+ }
+ return 0;
+}
+
+sub is_finite {
+ my $self = shift;
+ my @members = $self->members;
+ for (@members) {
+ return 0 if not $_->is_finite;
+ }
+ return 1;
+}
+1;
diff --git a/lib/FLAT/Regex/Parser.pm b/lib/FLAT/Regex/Parser.pm
new file mode 100644
index 0000000..deb73f1
--- /dev/null
+++ b/lib/FLAT/Regex/Parser.pm
@@ -0,0 +1,82 @@
+package FLAT::Regex::Parser;
+use strict;
+
+#### Is this one level of abstraction too far? Parser generator generators..
+
+#### TODO: try YAPP, since recursive descent is SLOOOW
+use Parse::RecDescent;
+use FLAT::Regex::Op;
+
+use vars '$CHAR';
+$CHAR = qr{ [A-Za-z0-9_\$\#] | \[[^\]]*\] }x;
+
+sub new {
+ my $pkg = shift;
+ my @ops = sort { $a->{prec} <=> $b->{prec} }
+ map {{
+ pkg => "FLAT::Regex::Op::$_",
+ prec => "FLAT::Regex::Op::$_"->precedence,
+ spec => "FLAT::Regex::Op::$_"->parse_spec,
+ short => $_
+ }} @_;
+
+ my $lowest = shift @ops;
+ my $grammar = qq!
+ parse:
+ $lowest->{short} /^\\Z/ { \$item[1] }
+ !;
+
+ my $prev = $lowest;
+ for (@ops) {
+ my $spec = sprintf $prev->{spec}, $_->{short};
+
+ $grammar .= qq!
+ $prev->{short}:
+ $spec { $prev->{pkg}\->from_parse(\@item) }
+ | $_->{short} { \$item[1] }
+ !;
+
+ $prev = $_;
+ }
+
+ my $spec = sprintf $prev->{spec}, "atomic";
+ $grammar .= qq!
+ $prev->{short}:
+ $spec { $prev->{pkg}\->from_parse(\@item) }
+ | atomic { \$item[1] }
+
+ atomic:
+ "(" $lowest->{short} ")" { \$item[2] }
+ | /\$FLAT::Regex::Parser::CHAR/
+ { FLAT::Regex::Op::atomic->from_parse(\@item) }
+ !;
+
+ Parse::RecDescent->new($grammar);
+}
+
+1;
+
+
+__END__
+
+original parser:
+
+use vars '$CHAR';
+$CHAR = qr{ [A-Za-z0-9_\!\@\#\$\%\&] | \[[^\]]*\] }x;
+
+my $PARSER = Parse::RecDescent->new(<<'__EOG__') or die;
+ parse:
+ alt /^\Z/ { $item[1] }
+ alt:
+ concat(2.. /[+|]/) { FLAT::Regex::Op::alt->from_parse(@item) }
+ | concat { $item[1] }
+ concat:
+ star(2..) { FLAT::Regex::Op::concat->from_parse(@item) }
+ | star { $item[1] }
+ star :
+ atomic '*' { FLAT::Regex::Op::star->from_parse(@item) }
+ | atomic { $item[1] }
+ atomic:
+ "(" alt ")" { $item[2] }
+ | /$FLAT::Regex::CHAR/ { FLAT::Regex::Op::atomic->from_parse(@item) }
+__EOG__
diff --git a/lib/FLAT/Regex/Transform.pm b/lib/FLAT/Regex/Transform.pm
new file mode 100644
index 0000000..cd0cf56
--- /dev/null
+++ b/lib/FLAT/Regex/Transform.pm
@@ -0,0 +1,18 @@
+package FLAT::Regex::Transform;
+
+# Extends FLAT::Regex::WithExtraOps with PRegex transformations
+# (i.e., reductions based on: w*v & a*b
+
+use base 'FLAT::Regex::WithExtraOps';
+
+sub new {
+ my $pkg = shift;
+ my $self = $pkg->SUPER::new(@_);
+ return $self;
+}
+
+# Ideally, the transformation should be implemented as an iterator. This
+# approach will be finite for shuffles with NO closed strings, but will carry on
+# indefinitely for the shuffle of strings where at least one of the strings is closed
+
+1;
diff --git a/lib/FLAT/Regex/Util.pm b/lib/FLAT/Regex/Util.pm
new file mode 100644
index 0000000..516ad9f
--- /dev/null
+++ b/lib/FLAT/Regex/Util.pm
@@ -0,0 +1,33 @@
+package FLAT::Regex::Util;
+use base 'FLAT::Regex';
+
+use strict;
+use Carp;
+
+sub get_symbol {
+ my @symbols = qw/0 1/;
+ return $symbols[rand(2)];
+}
+
+sub get_op {
+ my @ops = ('*','+','&','','','','','','','');
+ return $ops[rand(10)];
+}
+
+sub get_random {
+ my $length = shift;
+ my $string = '';
+ if (1 < $length) {
+ $string = get_symbol().get_op().get_random(--$length);
+ } else {
+ $string = get_symbol();
+ }
+ return $string;
+}
+
+sub random_pre {
+ my $length = ( $_[0] ? $_[0] : 32 );
+ return FLAT::Regex::WithExtraOps->new(get_random($length));
+}
+
+1;
diff --git a/lib/FLAT/Regex/WithExtraOps.pm b/lib/FLAT/Regex/WithExtraOps.pm
new file mode 100644
index 0000000..b366d7c
--- /dev/null
+++ b/lib/FLAT/Regex/WithExtraOps.pm
@@ -0,0 +1,109 @@
+package FLAT::Regex::WithExtraOps;
+use base 'FLAT::Regex';
+
+use strict;
+use Carp;
+
+my $PARSER = FLAT::Regex::Parser->new(qw[ alt concat star negate shuffle ]);
+sub _parser { $PARSER }
+
+sub members {
+ my $self = shift;
+ wantarray ? @$self[0 .. $#$self] : $self->[0];
+}
+
+#### Precedence
+# 30 ::star
+# 20 ::concat
+# 15 ::negate <---<< WithExtraOps
+# 12 ::shuffle <---<< WithExtraOps
+# 10 ::alt
+# 0 ::atomic
+
+###############################
+package FLAT::Regex::Op::negate;
+use base "FLAT::Regex::Op";
+use Carp;
+
+sub parse_spec { "'~' %s"; }
+sub precedence { 15 } # between concat and alternation
+
+sub as_string {
+ my ($self, $prec) = @_;
+ my $result = "~" . $self->members->as_string($self->precedence);
+ return $prec > $self->precedence ? "($result)" : $result;
+}
+
+sub from_parse {
+ my ($pkg, @item) = @_;
+ $pkg->new( $item[2] );
+}
+
+## note: "reverse" conflicts with perl builtin
+sub reverse {
+ my $self = shift;
+ my $op = $self->members->reverse;
+ __PACKAGE__->new($op);
+}
+
+sub is_empty {
+ croak "Not implemented for negated regexes";
+}
+
+sub has_nonempty_string {
+ croak "Not implemented for negated regexes";
+}
+
+sub is_finite {
+ croak "Not implemented for negated regexes";
+}
+
+###############################
+package FLAT::Regex::Op::shuffle;
+use base 'FLAT::Regex::Op';
+use Carp;
+
+sub parse_spec { "%s(2.. /[&]/)" }
+sub precedence { 12 }
+
+sub as_string {
+ my ($self, $prec) = @_;
+ my $result = join "&",
+ map { $_->as_string($self->precedence) }
+ $self->members;
+ return $prec > $self->precedence ? "($result)" : $result;
+}
+
+sub as_perl_regex {
+ my $self = shift;
+ croak "Not implemented for shuffled regexes";
+}
+
+sub from_parse {
+ my ($pkg, @item) = @_;
+ $pkg->new( @{ $item[1] } );
+}
+
+sub as_pfa {
+ my $self = shift;
+ my @parts = map { $_->as_pfa } $self->members;
+ $parts[0]->shuffle( @parts[1..$#parts] );
+}
+
+# Implement?
+sub reverse {
+ my $self = shift;
+ croak "Not implemented for shuffled regexes";
+}
+
+sub is_empty {
+ croak "Not implemented for shuffled regexes";
+}
+
+sub has_nonempty_string {
+ croak "Not implemented for shuffled regexes";
+}
+
+sub is_finite {
+ croak "Not implemented for shuffled regexes";
+}