summaryrefslogtreecommitdiff
path: root/lib/FLAT/Regex.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/FLAT/Regex.pm')
-rw-r--r--lib/FLAT/Regex.pm194
1 files changed, 194 insertions, 0 deletions
diff --git a/lib/FLAT/Regex.pm b/lib/FLAT/Regex.pm
new file mode 100644
index 0000000..2c5c243
--- /dev/null
+++ b/lib/FLAT/Regex.pm
@@ -0,0 +1,194 @@
+package FLAT::Regex;
+use base 'FLAT';
+use strict;
+use Carp;
+
+use FLAT::Regex::Parser;
+use FLAT::Regex::Op;
+
+my $PARSER = FLAT::Regex::Parser->new(qw[ alt concat star ]);
+#### TODO: error checking in the parse
+
+sub _parser { $PARSER }
+
+sub new {
+ my ($pkg, $string) = @_;
+ my $result = $pkg->_parser->parse($string)
+ or croak qq[``$string'' is not a valid regular expression];
+
+ $pkg->_from_op( $result );
+}
+
+sub _from_op {
+ my ($proto, $op) = @_;
+ $proto = ref $proto || $proto; ## I really do want this
+
+ bless [ $op ], $proto;
+}
+
+sub op {
+ $_[0][0];
+}
+
+use overload '""' => 'as_string';
+sub as_string {
+ $_[0]->op->as_string(0);
+}
+
+sub as_perl_regex {
+ my ($self, %opts) = @_;
+
+ my $fmt = $opts{anchored} ? '(?:\A%s\z)' : '(?:%s)';
+ return sprintf $fmt, $self->op->as_perl_regex(0);
+}
+
+sub contains {
+ my ($self, $string) = @_;
+ $string =~ $self->as_perl_regex(anchored => 1);
+}
+
+sub as_nfa {
+ $_[0]->op->as_nfa;
+}
+
+sub as_pfa {
+ $_[0]->op->as_pfa;
+}
+
+#### regular language standard interface implementation:
+#### TODO: parameter checking?
+
+sub as_regex {
+ $_[0];
+}
+
+sub union {
+ my $self = $_[0];
+ my $op = FLAT::Regex::op::alt->new( map { $_->as_regex->op } @_ );
+ $self->_from_op($op);
+}
+
+sub intersect {
+ my @dfas = map { $_->as_dfa } @_;
+ my $self = shift @dfas;
+ $self->intersect(@dfas)->as_regex;
+}
+
+sub complement {
+ my $self = shift;
+ $self->as_dfa->complement->as_regex;
+}
+
+sub concat {
+ my $self = $_[0];
+ my $op = FLAT::Regex::op::concat->new( map { $_->as_regex->op } @_ );
+ $self->_from_op($op);
+}
+
+sub kleene {
+ my $self = shift;
+ my $op = FLAT::Regex::op::star->new( $self->op );
+ $self->_from_op($op);
+}
+
+sub reverse {
+ my $self = shift;
+ my $op = $self->op->reverse;
+ $self->_from_op($op);
+}
+
+sub is_empty {
+ $_[0]->op->is_empty;
+}
+
+sub is_finite {
+ $_[0]->op->is_finite;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+FLAT::Regex - Regular expressions
+
+=head1 SYNOPSIS
+
+A FLAT::Regex object is a regular expression.
+
+=head1 USAGE
+
+In addition to implementing the interface specified in L<FLAT>, FLAT::Regex
+objects provide the following regex-specific methods:
+
+=over
+
+=item FLAT::Regex-E<gt>new($string)
+
+Returns a regex object representing the expression given in $string. C<|>
+and C<+> can both be used to denote alternation. C<*> denotes Kleene star, and
+parentheses can be used for grouping. No other features or shortcut notation
+is currently supported (character classes, {n,m} repetition, etc).
+
+Whitespaces is ignored. To specify a literal space, use C<[ ]>. This syntax
+can also be used to specify atomic "characters" longer than a single
+character. For example, the expression:
+
+ [foo]abc[bar]*
+
+is treated as a regular expression over the symbols "a", "b", "c", "foo",
+and "bar". In particular, this means that when the regular expression is
+reversed, "foo" and "bar" remain the same (i.e, they do not become "oof" and
+"rab").
+
+The empty regular expression (epsilon) is written as C<[]>, and the null
+regular expression (sometimes called phi) is specified with the C<#>
+character. To specify a literal hash-character, use C<[#]>. Including
+literal square bracket characters is currently not supported.
+
+The expression "" (or any string containing only whitespace) is not a valid
+FLAT regex expression. Either C<[]> or C<#> are probably what was intended.
+
+=item $regex-E<gt>as_string
+
+Returns the string representation of the regex, in the same format as above.
+It is NOT necessarily true that
+
+ FLAT::Regex->new($string)->as_string
+
+is identical to $string, especially if $string contains whitespace or
+redundant parentheses.
+
+=item $regex-E<gt>as_perl_regex
+
+=item $regex-E<gt>as_perl_regex(anchored => $bool);
+
+Returns an equivalent Perl regular expression. If the "anchored" option
+is set to a true value, the regular expression will be anchored with
+C<\A> and C<\z>. The default behavior is to omit the anchors.
+
+The Perl regex will not contain capturing parentheses. "Extended" characters
+that are written as "[char]" in FLAT regexes will be written without the
+square brackets in the corresponding Perl regex. So the following:
+
+ FLAT::Regex->new("[foo][bar]*")->as_perl_regex
+
+will be equal to "(?:foo(?:bar)*)".
+
+=back
+
+=head1 AUTHORS & ACKNOWLEDGEMENTS
+
+FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and
+Brett Estrade E<lt>estradb at gmail dot comE<gt>.
+
+The initial version (FLAT::Legacy) by Brett Estrade was work towards an
+MS thesis at the University of Southern Mississippi.
+
+Please visit the Wiki at http://www.0x743.com/flat
+
+=head1 LICENSE
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.