diff options
Diffstat (limited to 'lib/FLAT/Regex')
-rw-r--r-- | lib/FLAT/Regex/Op.pm | 282 | ||||
-rw-r--r-- | lib/FLAT/Regex/Parser.pm | 82 | ||||
-rw-r--r-- | lib/FLAT/Regex/Transform.pm | 18 | ||||
-rw-r--r-- | lib/FLAT/Regex/Util.pm | 33 | ||||
-rw-r--r-- | lib/FLAT/Regex/WithExtraOps.pm | 109 |
5 files changed, 524 insertions, 0 deletions
diff --git a/lib/FLAT/Regex/Op.pm b/lib/FLAT/Regex/Op.pm new file mode 100644 index 0000000..76e796c --- /dev/null +++ b/lib/FLAT/Regex/Op.pm @@ -0,0 +1,282 @@ +package FLAT::Regex::Op; +use strict; + +sub new { + my $pkg = shift; + ## flatten alike operations, i.e, "a+(b+c)" into "a+b+c" + my @flat = map { UNIVERSAL::isa($_, $pkg) ? $_->members : $_ } @_; + + bless \@flat, $pkg; +} + +sub members { + my $self = shift; + wantarray ? @$self[0 .. $#$self] : $self->[0]; +} + + +################################# +#### regex operators / components + +package FLAT::Regex::Op::atomic; +use base 'FLAT::Regex::Op'; + +sub as_string { + my $t = $_[0]->members; + + return "#" if not defined $t; + return $t =~ /^\w$/ + ? $t + : "[$t]"; +} + +sub as_perl_regex { + my $r = $_[0]->members; + + return "(?!)" if not defined $r; + + $r = quotemeta $r; + return $r =~ /^\w$/ ? $r : "(?:$r)"; +} + +sub as_nfa { + FLAT::NFA->singleton( $_[0]->members ); +} + +sub as_pfa { + FLAT::PFA->singleton( $_[0]->members ); +} + +sub from_parse { + my ($pkg, @item) = @_; + my $i = $item[1]; + + return $pkg->new("") if $i eq "[]"; + return $pkg->new(undef) if $i eq "#"; + + $i =~ s/^\[|\]$//g; + + return $pkg->new($i); +} + +sub reverse { + $_[0]; +} + +sub is_empty { + not defined $_[0]->members; +} + +sub has_nonempty_string { + my $self = shift; + defined $self->members and length $self->members; +} + +sub is_finite { + 1 +} + +############################## +package FLAT::Regex::Op::star; +use base 'FLAT::Regex::Op'; + +sub parse_spec { "%s '*'" } +sub precedence { 30 } + +sub as_string { + my ($self, $prec) = @_; + my $result = $self->members->as_string($self->precedence) . "*"; + return $prec > $self->precedence ? "($result)" : $result; +} + +sub as_perl_regex { + my ($self, $prec) = @_; + my $result = $self->members->as_perl_regex($self->precedence) . "*"; + return $prec > $self->precedence ? "(?:$result)" : $result; +} + +sub as_nfa { + my $self = shift; + $self->members->as_nfa->kleene; +} + +sub as_pfa { + my $self = shift; + $self->members->as_pfa->kleene; +} + +sub from_parse { + my ($pkg, @item) = @_; + $pkg->new( $item[1] ); +} + +sub reverse { + my $self = shift; + my $op = $self->members->reverse; + __PACKAGE__->new($op); +} + +sub is_empty { + 0 +} + +sub has_nonempty_string { + $_[0]->members->has_nonempty_string; +} + +sub is_finite { + ! $_[0]->members->has_nonempty_string; +} + + +################################ +package FLAT::Regex::Op::concat; +use base 'FLAT::Regex::Op'; + +sub parse_spec { "%s(2..)"; } +sub precedence { 20 } + +sub as_string { + my ($self, $prec) = @_; + my $result = join "", + map { $_->as_string($self->precedence) } + $self->members; + return $prec > $self->precedence ? "($result)" : $result; +} + +sub as_perl_regex { + my ($self, $prec) = @_; + my $result = join "", + map { $_->as_perl_regex($self->precedence) } + $self->members; + return $prec > $self->precedence ? "(?:$result)" : $result; +} + +sub as_nfa { + my $self = shift; + my @parts = map { $_->as_nfa } $self->members; + $parts[0]->concat( @parts[1..$#parts] ); +} + +sub as_pfa { + my $self = shift; + my @parts = map { $_->as_pfa } $self->members; + $parts[0]->concat( @parts[1..$#parts] ); +} + +sub from_parse { + my ($pkg, @item) = @_; + $pkg->new( @{ $item[1] } ); +} + +## note: "reverse" conflicts with perl builtin +sub reverse { + my $self = shift; + my @ops = CORE::reverse map { $_->reverse } $self->members; + __PACKAGE__->new(@ops); +} + +sub is_empty { + my $self = shift; + my @members = $self->members; + for (@members) { + return 1 if $_->is_empty; + } + return 0; +} + +sub has_nonempty_string { + my $self = shift; + return 0 if $self->is_empty; + + my @members = $self->members; + for (@members) { + return 1 if $_->has_nonempty_string; + } + return 0; +} + +sub is_finite { + my $self = shift; + return 1 if $self->is_empty; + + my @members = $self->members; + for (@members) { + return 0 if not $_->is_finite; + } + return 1; +} + +############################# +package FLAT::Regex::Op::alt; +use base 'FLAT::Regex::Op'; + +sub parse_spec { "%s(2.. /[+|]/)" } +sub precedence { 10 } + +sub as_string { + my ($self, $prec) = @_; + my $result = join "+", + map { $_->as_string($self->precedence) } + $self->members; + return $prec > $self->precedence ? "($result)" : $result; +} + +sub as_perl_regex { + my ($self, $prec) = @_; + my $result = join "|", + map { $_->as_perl_regex($self->precedence) } + $self->members; + return $prec > $self->precedence ? "(?:$result)" : $result; +} + +sub as_nfa { + my $self = shift; + my @parts = map { $_->as_nfa } $self->members; + $parts[0]->union( @parts[1..$#parts] ); +} + +sub as_pfa { + my $self = shift; + my @parts = map { $_->as_pfa } $self->members; + $parts[0]->union( @parts[1..$#parts] ); +} + +sub from_parse { + my ($pkg, @item) = @_; + $pkg->new( @{ $item[1] } ); +} + +sub reverse { + my $self = shift; + my @ops = map { $_->reverse } $self->members; + __PACKAGE__->new(@ops); +} + +sub is_empty { + my $self = shift; + my @members = $self->members; + for (@members) { + return 0 if not $_->is_empty; + } + return 1; +} + +sub has_nonempty_string { + my $self = shift; + my @members = $self->members; + for (@members) { + return 1 if $_->has_nonempty_string; + } + return 0; +} + +sub is_finite { + my $self = shift; + my @members = $self->members; + for (@members) { + return 0 if not $_->is_finite; + } + return 1; +} +1; diff --git a/lib/FLAT/Regex/Parser.pm b/lib/FLAT/Regex/Parser.pm new file mode 100644 index 0000000..deb73f1 --- /dev/null +++ b/lib/FLAT/Regex/Parser.pm @@ -0,0 +1,82 @@ +package FLAT::Regex::Parser; +use strict; + +#### Is this one level of abstraction too far? Parser generator generators.. + +#### TODO: try YAPP, since recursive descent is SLOOOW +use Parse::RecDescent; +use FLAT::Regex::Op; + +use vars '$CHAR'; +$CHAR = qr{ [A-Za-z0-9_\$\#] | \[[^\]]*\] }x; + +sub new { + my $pkg = shift; + my @ops = sort { $a->{prec} <=> $b->{prec} } + map {{ + pkg => "FLAT::Regex::Op::$_", + prec => "FLAT::Regex::Op::$_"->precedence, + spec => "FLAT::Regex::Op::$_"->parse_spec, + short => $_ + }} @_; + + my $lowest = shift @ops; + my $grammar = qq! + parse: + $lowest->{short} /^\\Z/ { \$item[1] } + !; + + my $prev = $lowest; + for (@ops) { + my $spec = sprintf $prev->{spec}, $_->{short}; + + $grammar .= qq! + $prev->{short}: + $spec { $prev->{pkg}\->from_parse(\@item) } + | $_->{short} { \$item[1] } + !; + + $prev = $_; + } + + my $spec = sprintf $prev->{spec}, "atomic"; + $grammar .= qq! + $prev->{short}: + $spec { $prev->{pkg}\->from_parse(\@item) } + | atomic { \$item[1] } + + atomic: + "(" $lowest->{short} ")" { \$item[2] } + | /\$FLAT::Regex::Parser::CHAR/ + { FLAT::Regex::Op::atomic->from_parse(\@item) } + !; + + Parse::RecDescent->new($grammar); +} + +1; + + +__END__ + +original parser: + +use vars '$CHAR'; +$CHAR = qr{ [A-Za-z0-9_\!\@\#\$\%\&] | \[[^\]]*\] }x; + +my $PARSER = Parse::RecDescent->new(<<'__EOG__') or die; + parse: + alt /^\Z/ { $item[1] } + alt: + concat(2.. /[+|]/) { FLAT::Regex::Op::alt->from_parse(@item) } + | concat { $item[1] } + concat: + star(2..) { FLAT::Regex::Op::concat->from_parse(@item) } + | star { $item[1] } + star : + atomic '*' { FLAT::Regex::Op::star->from_parse(@item) } + | atomic { $item[1] } + atomic: + "(" alt ")" { $item[2] } + | /$FLAT::Regex::CHAR/ { FLAT::Regex::Op::atomic->from_parse(@item) } +__EOG__ diff --git a/lib/FLAT/Regex/Transform.pm b/lib/FLAT/Regex/Transform.pm new file mode 100644 index 0000000..cd0cf56 --- /dev/null +++ b/lib/FLAT/Regex/Transform.pm @@ -0,0 +1,18 @@ +package FLAT::Regex::Transform; + +# Extends FLAT::Regex::WithExtraOps with PRegex transformations +# (i.e., reductions based on: w*v & a*b + +use base 'FLAT::Regex::WithExtraOps'; + +sub new { + my $pkg = shift; + my $self = $pkg->SUPER::new(@_); + return $self; +} + +# Ideally, the transformation should be implemented as an iterator. This +# approach will be finite for shuffles with NO closed strings, but will carry on +# indefinitely for the shuffle of strings where at least one of the strings is closed + +1; diff --git a/lib/FLAT/Regex/Util.pm b/lib/FLAT/Regex/Util.pm new file mode 100644 index 0000000..516ad9f --- /dev/null +++ b/lib/FLAT/Regex/Util.pm @@ -0,0 +1,33 @@ +package FLAT::Regex::Util; +use base 'FLAT::Regex'; + +use strict; +use Carp; + +sub get_symbol { + my @symbols = qw/0 1/; + return $symbols[rand(2)]; +} + +sub get_op { + my @ops = ('*','+','&','','','','','','',''); + return $ops[rand(10)]; +} + +sub get_random { + my $length = shift; + my $string = ''; + if (1 < $length) { + $string = get_symbol().get_op().get_random(--$length); + } else { + $string = get_symbol(); + } + return $string; +} + +sub random_pre { + my $length = ( $_[0] ? $_[0] : 32 ); + return FLAT::Regex::WithExtraOps->new(get_random($length)); +} + +1; diff --git a/lib/FLAT/Regex/WithExtraOps.pm b/lib/FLAT/Regex/WithExtraOps.pm new file mode 100644 index 0000000..b366d7c --- /dev/null +++ b/lib/FLAT/Regex/WithExtraOps.pm @@ -0,0 +1,109 @@ +package FLAT::Regex::WithExtraOps; +use base 'FLAT::Regex'; + +use strict; +use Carp; + +my $PARSER = FLAT::Regex::Parser->new(qw[ alt concat star negate shuffle ]); +sub _parser { $PARSER } + +sub members { + my $self = shift; + wantarray ? @$self[0 .. $#$self] : $self->[0]; +} + +#### Precedence +# 30 ::star +# 20 ::concat +# 15 ::negate <---<< WithExtraOps +# 12 ::shuffle <---<< WithExtraOps +# 10 ::alt +# 0 ::atomic + +############################### +package FLAT::Regex::Op::negate; +use base "FLAT::Regex::Op"; +use Carp; + +sub parse_spec { "'~' %s"; } +sub precedence { 15 } # between concat and alternation + +sub as_string { + my ($self, $prec) = @_; + my $result = "~" . $self->members->as_string($self->precedence); + return $prec > $self->precedence ? "($result)" : $result; +} + +sub from_parse { + my ($pkg, @item) = @_; + $pkg->new( $item[2] ); +} + +## note: "reverse" conflicts with perl builtin +sub reverse { + my $self = shift; + my $op = $self->members->reverse; + __PACKAGE__->new($op); +} + +sub is_empty { + croak "Not implemented for negated regexes"; +} + +sub has_nonempty_string { + croak "Not implemented for negated regexes"; +} + +sub is_finite { + croak "Not implemented for negated regexes"; +} + +############################### +package FLAT::Regex::Op::shuffle; +use base 'FLAT::Regex::Op'; +use Carp; + +sub parse_spec { "%s(2.. /[&]/)" } +sub precedence { 12 } + +sub as_string { + my ($self, $prec) = @_; + my $result = join "&", + map { $_->as_string($self->precedence) } + $self->members; + return $prec > $self->precedence ? "($result)" : $result; +} + +sub as_perl_regex { + my $self = shift; + croak "Not implemented for shuffled regexes"; +} + +sub from_parse { + my ($pkg, @item) = @_; + $pkg->new( @{ $item[1] } ); +} + +sub as_pfa { + my $self = shift; + my @parts = map { $_->as_pfa } $self->members; + $parts[0]->shuffle( @parts[1..$#parts] ); +} + +# Implement? +sub reverse { + my $self = shift; + croak "Not implemented for shuffled regexes"; +} + +sub is_empty { + croak "Not implemented for shuffled regexes"; +} + +sub has_nonempty_string { + croak "Not implemented for shuffled regexes"; +} + +sub is_finite { + croak "Not implemented for shuffled regexes"; +} |