summaryrefslogtreecommitdiff
path: root/lib/FLAT
diff options
context:
space:
mode:
authorDaniel Friesel <derf@finalrewind.org>2017-04-03 15:04:15 +0200
committerDaniel Friesel <derf@finalrewind.org>2017-04-03 15:04:15 +0200
commit00e57331b1c7ef2b1f402f41e1223308e0d8ce61 (patch)
tree05e9b4223072582a5a6843de6d9845213a94f341 /lib/FLAT
initial commit
Diffstat (limited to 'lib/FLAT')
-rw-r--r--lib/FLAT/CMD.pm533
-rw-r--r--lib/FLAT/CMD/AcyclicStrings.pm54
-rw-r--r--lib/FLAT/CMD/DFTStrings.pm55
-rw-r--r--lib/FLAT/DFA.pm557
-rw-r--r--lib/FLAT/FA.pm554
-rw-r--r--lib/FLAT/NFA.pm509
-rw-r--r--lib/FLAT/PFA.pm293
-rw-r--r--lib/FLAT/Regex.pm194
-rw-r--r--lib/FLAT/Regex/Op.pm282
-rw-r--r--lib/FLAT/Regex/Parser.pm82
-rw-r--r--lib/FLAT/Regex/Transform.pm18
-rw-r--r--lib/FLAT/Regex/Util.pm33
-rw-r--r--lib/FLAT/Regex/WithExtraOps.pm109
-rw-r--r--lib/FLAT/Symbol.pm98
-rw-r--r--lib/FLAT/Transition.pm66
-rw-r--r--lib/FLAT/XFA.pm3
16 files changed, 3440 insertions, 0 deletions
diff --git a/lib/FLAT/CMD.pm b/lib/FLAT/CMD.pm
new file mode 100644
index 0000000..5bd0e2d
--- /dev/null
+++ b/lib/FLAT/CMD.pm
@@ -0,0 +1,533 @@
+package FLAT::CMD;
+use FLAT;
+use FLAT::Regex;
+use FLAT::NFA;
+use FLAT::DFA;
+use Carp;
+
+=head1 NAME
+
+CMD - Commandline interface for the Formal Language & Automata Toolkit
+
+=head1 SYNOPSIS
+
+CMD.pm is provides an interface to the C<fash> commandline utility that offers
+certain features implemented in FLAT. Consequently, this interface is also
+available using the C<perl -MFLAT::CMD -e func> paradigm, but C<fash> makes
+things a lot more convenient.
+
+=head1 USAGE
+
+All regular language objects in FLAT implement the following methods.
+Specific regular language representations (regex, NFA, DFA) may implement
+additional methods that are outlined in the repsective POD pages.
+
+=cut
+
+# Support for perl one liners - like what CPAN.pm uses #<- should move all to another file
+use base 'Exporter'; #instead of: use Exporter (); @ISA = 'Exporter';
+use vars qw(@EXPORT $AUTOLOAD);
+
+@EXPORT = qw(compare dump dfa2gv nfa2gv pfa2gv dfa2undgv nfa2undgv pfa2undgv dfa2digraph
+ nfa2digraph pfa2digraph dfa2undirected nfa2undirected pfa2undirected random_pre random_re
+ savedfa test help
+ );
+
+sub AUTOLOAD {
+ my($l) = $AUTOLOAD;
+ $l =~ s/.*:://;
+ my(%EXPORT);
+ @EXPORT{@EXPORT} = '';
+ if (exists $EXPORT{$l}){
+ FLAT::CMD->$l(@_);
+ }
+}
+
+sub help {
+print <<END
+__________ .__ ___________.____ ___________
+\______ \ ___________| | \_ _____/| | _____\__ ___/
+ | ___// __ \_ __ \ | | __) | | \__ \ | |
+ | | \ ___/| | \/ |__ | \ | |___ / __ \| |
+ |____| \___ >__| |____/ \___ / |_______ (____ /____|
+ \/ \/ \/ \/
+
+ Everything is wrt parallel regular expressions, i.e.,
+ with the addtional shuffle operator, "&". All this
+ means is that you can use the ambersand (&) as a symbol
+ in the regular expressions you submit because it will be
+ detected as an operator.That said, if you avoid using
+ the "&" operator, you can forget about all that shuffle
+ business.
+
+%perl -MFLAT::CMD -e
+ "somestrings" 're1' # creates all valid strings via acyclic path, no cycles yet
+ "compare 're1','re2'" # comares 2 regexs | see note [2]
+ "dump 're1'" # dumps parse trees | see note[1]
+ "dfa2gv 're1'" # dumps graphviz digraph desc | see note[1]
+ "nfa2gv 're1'" # dumps graphviz digraph desc | see note[1]
+ "pfa2gv 're1'" # dumps graphviz digraph desc | see note[1]
+ "dfa2undgv 're1'" # dumps graphviz undirected graph desc | see note[1]
+ "nfa2undgv 're1'" # dumps graphviz undirected graph desc | see note[1]
+ "pfa2undgv 're1'" # dumps graphviz undirected graph desc | see note[1]
+ "dfa2digraph 're1'" # dumps directed graph without transitions
+ "nfa2digraph 're1'" # dumps directed graph without transitions
+ "pfa2digraph 're1'" # dumps directed graph without transitions
+ "dfa2undirected 're1'" # dumps undirected graph without transitions
+ "nfa2undirected 're1'" # dumps undirected graph without transitions
+ "pfa2undirected 're1'" # dumps undirected graph without transitions
+ random_pre
+ random_re
+ "savedfa 're1'" # converts PRE to min dfa, then serializes to disk
+ "test 'regex' 'string1'" # give a regex, reports if subsequent strings are valid
+ help
+
+NOTES:
+[1] This means you could presumably do something like the following:
+ %perl -MFLAT -e command < text_file_with_1_regex_per_line.txt
+ ^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+[2] This command compares the minimal DFAs of each regular expression;
+ if there exists a exact 1-1 mapping of symbols, states, and
+ transitions then the DFAs are considered equal. This means that
+ "abc" will be equal to "def" To make matters more confusing, "ab+ac"
+ would be equivalent to "xy+xz"; or worse yet, "z(x+y)". So to the
+ 'compare' command, "ab+ac" == "xy+xz" == "z(x+y)". This however
+ does not translate into the situation where "ab+ac" will accept
+ the same LITERAL strings as "z(x+y)" because the symbols are obviously
+ different.
+
+CREDITS:
+Blockhead, CPAN.pm (for the example of how to implement these one liners),
+and #perl on irc.freenode.net for pointing out something I missed when
+trying to copy CPAN one liner majik.
+
+Perl FLAT and all included modules are released under the same terms as Perl
+itself. Cheers.
+
+SEE:
+http://www.0x743.com/flat
+
+END
+}
+
+# save to a dat file
+sub savedfa {
+ my $PRE = shift;
+ # neat a better way to get input via stdin
+ if (!$PRE) {
+ while (<>) {
+ chomp;
+ $PRE = $_;
+ last;
+ }
+ }
+ use FLAT::Regex::WithExtraOps;
+ use FLAT::PFA;
+ use FLAT::NFA;
+ use FLAT::DFA;
+ use Storable;
+ # caches results, loads them in if detexted
+ my $dfa = FLAT::Regex::WithExtraOps->new($PRE)->as_pfa->as_nfa->as_dfa->as_min_dfa->trim_sinks;
+ store $dfa, "$PRE.dat";
+}
+
+# dumps directed graph using Kundu notation
+# Usage:
+# perl -MFLAT -e "pfa2directed('a&b&c&d*e*')"
+sub test {
+ use FLAT::Regex::WithExtraOps;
+ use FLAT::PFA;
+ use FLAT::NFA;
+ use FLAT::DFA;
+ # handles multiple strings; first is considered the regex
+ if (@_)
+ { my $FA = FLAT::Regex::WithExtraOps->new(shift @_)->as_pfa()->as_nfa->as_dfa();
+ foreach (@_)
+ { if ($FA->is_valid_string($_)) {
+ print "(+): $_\n";
+ } else {
+ print "(-): $_\n";
+ }
+ }
+ } else {
+ my $FA;
+ while (<STDIN>) {
+ chomp;
+ if ($. == 1) { #<-- uses first line as regex!
+ $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa->as_dfa();
+ } else {
+ if ($FA->is_valid_string($_)) {
+ print "(+): $_\n";
+ } else {
+ print "(-): $_\n";
+ }
+ }
+ }
+ }
+}
+
+# dumps parse tree
+# Usage:
+# perl -MFLAT -e "dump('re1','re2',...,'reN')"
+# perl -MFLAT -e dump < list_of_regexes.dat
+sub dump {
+ use FLAT::Regex::WithExtraOps;
+ use Data::Dumper;
+ if (@_)
+ { foreach (@_)
+ { my $PRE = FLAT::Regex::WithExtraOps->new($_);
+ print Dumper($PRE); }}
+ else
+ { while (<STDIN>)
+ { chomp;
+ my $PRE = FLAT::Regex::WithExtraOps->new($_);
+ print Dumper($PRE); }
+ }
+}
+
+# dumps graphviz notation
+# Usage:
+# perl -MFLAT -e "dfa2gv('a&b&c&d*e*')"
+sub dfa2gv {
+ use FLAT::Regex::WithExtraOps;
+ use FLAT::DFA;
+ use FLAT::NFA;
+ use FLAT::PFA;
+ if (@_)
+ { foreach (@_)
+ { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa()->as_min_dfa()->trim_sinks();
+ print $FA->as_graphviz;} }
+ else
+ { while (<STDIN>)
+ { chomp;
+ my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa()->trim_sinks();
+ print $FA->as_graphviz;}
+ }
+}
+
+# dumps graphviz notation
+# Usage:
+# perl -MFLAT -e "nfa2gv('a&b&c&d*e*')"
+sub nfa2gv {
+ use FLAT::Regex::WithExtraOps;
+ use FLAT::DFA;
+ use FLAT::NFA;
+ use FLAT::PFA;
+ if (@_)
+ { foreach (@_)
+ { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa();
+ print $FA->as_graphviz;} }
+ else
+ { while (<STDIN>)
+ { chomp;
+ my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa();
+ print $FA->as_graphviz;}
+ }
+}
+
+# dumps graphviz notation
+# Usage:
+# perl -MFLAT -e "pfa2gv('a&b&c&d*e*')"
+sub pfa2gv {
+ use FLAT::Regex::WithExtraOps;
+ use FLAT::PFA;
+ if (@_)
+ { foreach (@_)
+ { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa();
+ print $FA->as_graphviz;} }
+ else
+ { while (<STDIN>)
+ { chomp;
+ my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa();
+ print $FA->as_graphviz;}
+ }
+}
+
+#as_undirected_graphviz
+
+# dumps graphviz notation
+# Usage:
+# perl -MFLAT -e "dfa2undgv('a&b&c&d*e*')"
+sub dfa2undgv {
+ use FLAT::Regex::WithExtraOps;
+ use FLAT::DFA;
+ use FLAT::NFA;
+ use FLAT::PFA;
+ if (@_)
+ { foreach (@_)
+ { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa()->as_min_dfa()->trim_sinks();
+ print $FA->as_undirected_graphviz;} }
+ else
+ { while (<STDIN>)
+ { chomp;
+ my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa()->trim_sinks();
+ print $FA->as_undirected_graphviz;}
+ }
+}
+
+# dumps graphviz notation
+# Usage:
+# perl -MFLAT -e "nfa2undgv('a&b&c&d*e*')"
+sub nfa2undgv {
+ use FLAT::Regex::WithExtraOps;
+ use FLAT::DFA;
+ use FLAT::NFA;
+ use FLAT::PFA;
+ if (@_)
+ { foreach (@_)
+ { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa();
+ print $FA->as_undirected_graphviz;} }
+ else
+ { while (<STDIN>)
+ { chomp;
+ my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa();
+ print $FA->as_undirected_graphviz;}
+ }
+}
+
+# dumps graphviz notation
+# Usage:
+# perl -MFLAT -e "pfa2undgv('a&b&c&d*e*')"
+sub pfa2undgv {
+ use FLAT::Regex::WithExtraOps;
+ use FLAT::PFA;
+ if (@_)
+ { foreach (@_)
+ { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa();
+ print $FA->as_undirected_graphviz;} }
+ else
+ { while (<STDIN>)
+ { chomp;
+ my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa();
+ print $FA->as_undirected_graphviz;}
+ }
+}
+
+# dumps directed graph using Kundu notation
+# Usage:
+# perl -MFLAT -e "dfa2directed('a&b&c&d*e*')"
+sub dfa2digraph {
+ use FLAT::Regex::WithExtraOps;
+ use FLAT::DFA;
+ use FLAT::NFA;
+ use FLAT::PFA;
+ # trims sink states from min-dfa since transitions are gone
+ if (@_)
+ { foreach (@_)
+ { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa->trim_sinks();
+ print $FA->as_digraph;} }
+ else
+ { while (<STDIN>)
+ { chomp;
+ my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa->trim_sinks();
+ print $FA->as_digraph;}
+ }
+ print "\n";
+}
+
+# dumps directed graph using Kundu notation
+# Usage:
+# perl -MFLAT -e "nfa2directed('a&b&c&d*e*')"
+sub nfa2digraph {
+ use FLAT::Regex::WithExtraOps;
+ use FLAT::DFA;
+ use FLAT::NFA;
+ use FLAT::PFA;
+ if (@_)
+ { foreach (@_)
+ { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa();
+ print $FA->as_digraph;} }
+ else
+ { while (<STDIN>)
+ { chomp;
+ my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa();
+ print $FA->as_digraph;}
+ }
+ print "\n";
+}
+
+# dumps directed graph using Kundu notation
+# Usage:
+# perl -MFLAT -e "pfa2directed('a&b&c&d*e*')"
+sub pfa2digraph {
+ use FLAT::Regex::WithExtraOps;
+ use FLAT::PFA;
+ if (@_)
+ { foreach (@_)
+ { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa();
+ print $FA->as_digraph;} }
+ else
+ { while (<STDIN>)
+ { chomp;
+ my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa();
+ print $FA->as_digraph;}
+ }
+ print "\n";
+}
+
+# dumps undirected graph using Kundu notation
+# Usage:
+# perl -MFLAT -e "dfa2undirected('a&b&c&d*e*')"
+sub dfa2undirected {
+ use FLAT::Regex::WithExtraOps;
+ use FLAT::DFA;
+ use FLAT::NFA;
+ use FLAT::PFA;
+ # trims sink states from min-dfa since transitions are gone
+ if (@_)
+ { foreach (@_)
+ { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa->trim_sinks();
+ print $FA->as_undirected;} }
+ else
+ { while (<STDIN>)
+ { chomp;
+ my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa->trim_sinks();
+ print $FA->as_undirected;}
+ }
+ print "\n";
+}
+
+# dumps undirected graph using Kundu notation
+# Usage:
+# perl -MFLAT -e "nfa2undirected('a&b&c&d*e*')"
+sub nfa2undirected {
+ use FLAT::Regex::WithExtraOps;
+ use FLAT::DFA;
+ use FLAT::NFA;
+ use FLAT::PFA;
+ if (@_)
+ { foreach (@_)
+ { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa();
+ print $FA->as_undirected;} }
+ else
+ { while (<STDIN>)
+ { chomp;
+ my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa();
+ print $FA->as_undirected;}
+ }
+ print "\n";
+}
+
+# dumps undirected graph using Kundu notation
+# Usage:
+# perl -MFLAT -e "pfa2undirected('a&b&c&d*e*')"
+sub pfa2undirected {
+ use FLAT::Regex::WithExtraOps;
+ use FLAT::PFA;
+ if (@_)
+ { foreach (@_)
+ { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa();
+ print $FA->as_undirected;} }
+ else
+ { while (<STDIN>)
+ { chomp;
+ my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa();
+ print $FA->as_undirected;}
+ }
+ print "\n";
+}
+
+# compares 2 give PREs
+# Usage:
+# perl -MFLAT -e "compare('a','a&b&c&d*e*')" #<-- no match, btw
+sub compare {
+ use FLAT::Regex::WithExtraOps;
+ use FLAT::DFA;
+ use FLAT::PFA;
+ my $PFA1 = FLAT::Regex::WithExtraOps->new(shift)->as_pfa();
+ my $PFA2 = FLAT::Regex::WithExtraOps->new(shift)->as_pfa();
+ my $DFA1 = $PFA1->as_nfa->as_min_dfa;
+ my $DFA2 = $PFA2->as_nfa->as_min_dfa;
+ if ($DFA1->equals($DFA2)) {
+ print "Yes\n";
+ } else {
+ print "No\n";
+ }
+}
+
+# prints random PRE
+# Usage:
+# perl -MFLAT -e random_pre
+sub random_pre {
+ my $and_chance = shift;
+ # skirt around deep recursion warning annoyance
+ local $SIG{__WARN__} = sub { $_[0] =~ /^Deep recursion/ or warn $_[0] };
+ srand $$;
+ my %CMDLINEOPTS = ();
+ # Percent chance of each operator occuring
+ $CMDLINEOPTS{LENGTH} = 32;
+ $CMDLINEOPTS{OR} = 6;
+ $CMDLINEOPTS{STAR} = 10;
+ $CMDLINEOPTS{OPEN} = 5;
+ $CMDLINEOPTS{CLOSE} = 0;
+ $CMDLINEOPTS{n} = 1;
+ $CMDLINEOPTS{AND} = 10; #<-- default
+ $CMDLINEOPTS{AND} = $and_chance if ($and_chance == 0); #<-- to make it just an re (no shuffle)
+
+
+ my $getRandomChar = sub {
+ my $ch = '';
+ # Get a random character between 0 and 127.
+ do {
+ $ch = int(rand 2);
+ } while ($ch !~ m/[a-zA-Z0-9]/);
+ return $ch;
+ };
+
+ my $getRandomRE = sub {
+ my $str = '';
+ my @closeparens = ();
+ for (1..$CMDLINEOPTS{LENGTH}) {
+ $str .= $getRandomChar->();
+ # % chance of an "or"
+ if (int(rand 100) < $CMDLINEOPTS{OR}) {
+ $str .= "|1";
+ } elsif (int(rand 100) < $CMDLINEOPTS{AND}) {
+ $str .= "&0";
+ } elsif (int(rand 100) < $CMDLINEOPTS{STAR}) {
+ $str .= "*1";
+ } elsif (int(rand 100) < $CMDLINEOPTS{OPEN}) {
+ $str .= "(";
+ push(@closeparens,'0101)');
+ } elsif (int(rand 100) < $CMDLINEOPTS{CLOSE} && @closeparens) {
+ $str .= pop(@closeparens);
+ }
+ }
+ # empty out @closeparens if there are still some left
+ if (@closeparens) {
+ $str .= join('',@closeparens);
+ }
+ return $str;
+ };
+
+ for (1..$CMDLINEOPTS{n}) {
+ print $getRandomRE->(),"\n";
+ }
+}
+
+# prints random RE (no & operator)
+# Usage:
+# perl -MFLAT -e random_re
+sub random_re {
+ shift->random_pre(0);
+}
+
+1;
+
+__END__
+
+=head1 AUTHORS & ACKNOWLEDGEMENTS
+
+FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and
+Brett Estrade E<lt>estradb at gmail dot comE<gt>.
+
+The initial version (FLAT::Legacy) by Brett Estrade was work towards an
+MS thesis at the University of Southern Mississippi.
+
+Please visit the Wiki at http://www.0x743.com/flat
+
+=head1 LICENSE
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
diff --git a/lib/FLAT/CMD/AcyclicStrings.pm b/lib/FLAT/CMD/AcyclicStrings.pm
new file mode 100644
index 0000000..ebc8840
--- /dev/null
+++ b/lib/FLAT/CMD/AcyclicStrings.pm
@@ -0,0 +1,54 @@
+# all strings available via acyclic path from the DFA start state to any all of the final states
+
+package FLAT::CMD::AcyclicStrings;
+use base 'FLAT::CMD';
+use FLAT;
+use FLAT::Regex::WithExtraOps;
+use FLAT::PFA;
+use FLAT::NFA;
+use FLAT::DFA;
+use Storable;
+use Carp;
+
+# Support for perl one liners - like what CPAN.pm uses #<- should move all to another file
+use base 'Exporter'; #instead of: use Exporter (); @ISA = 'Exporter';
+use vars qw(@EXPORT $AUTOLOAD);
+
+@EXPORT = qw(as_strings);
+
+sub AUTOLOAD {
+ my($l) = $AUTOLOAD;
+ $l =~ s/.*:://;
+ my(%EXPORT);
+ @EXPORT{@EXPORT} = '';
+ if (exists $EXPORT{$l}){
+ FLAT::CMD->$l(@_);
+ }
+}
+
+use vars qw(%nodes %dflabel %backtracked %low $lastDFLabel @string $dfa);
+# acyclic - no cycles
+sub as_strings {
+ my $PRE = shift;
+ # neat a better way to get input via stdin
+ if (!$PRE) {
+ while (<>) {
+ chomp;
+ $PRE = $_;
+ last;
+ }
+ }
+ # caches results, loads them in if detexted
+ my $RE = FLAT::Regex::WithExtraOps->new($PRE);
+ printf("%s\n",$RE->as_string());
+ if (!-e "$PRE.dat") {
+ $dfa = $RE->as_pfa->as_nfa->as_dfa->as_min_dfa->trim_sinks;
+ #store $dfa, "$PRE.dat";
+ } else {
+ print STDERR "$PRE.dat found..";
+ $dfa = retrieve "$PRE.dat";
+ }
+ $dfa->as_acyclic_strings();
+}
+
+1;
diff --git a/lib/FLAT/CMD/DFTStrings.pm b/lib/FLAT/CMD/DFTStrings.pm
new file mode 100644
index 0000000..9dc5a59
--- /dev/null
+++ b/lib/FLAT/CMD/DFTStrings.pm
@@ -0,0 +1,55 @@
+# all strings available via depth first traversal, including back edges that happen to land on an accepting state
+
+package FLAT::CMD::DFTStrings;
+use base 'FLAT::CMD';
+use FLAT;
+use FLAT::Regex::WithExtraOps;
+use FLAT::PFA;
+use FLAT::NFA;
+use FLAT::DFA;
+use Storable;
+use Carp;
+
+# Support for perl one liners - like what CPAN.pm uses #<- should move all to another file
+use base 'Exporter'; #instead of: use Exporter (); @ISA = 'Exporter';
+use vars qw(@EXPORT $AUTOLOAD);
+
+@EXPORT = qw(as_strings);
+
+sub AUTOLOAD {
+ my($l) = $AUTOLOAD;
+ $l =~ s/.*:://;
+ my(%EXPORT);
+ @EXPORT{@EXPORT} = '';
+ if (exists $EXPORT{$l}){
+ FLAT::CMD->$l(@_);
+ }
+}
+
+use vars qw(%nodes %dflabel %backtracked %low $lastDFLabel @string $dfa);
+# acyclic - no cycles
+sub as_strings {
+ my $PRE = shift;
+ # neat a better way to get input via stdin
+ if (!$PRE) {
+ while (<>) {
+ chomp;
+ $PRE = $_;
+ last;
+ }
+ }
+ # caches results, loads them in if detexted
+ my $RE = FLAT::Regex::WithExtraOps->new($PRE);
+ printf("%s\n",$RE->as_string());
+ if (!-e "$PRE.dat") {
+ $dfa = $RE->as_pfa->as_nfa->as_dfa->as_min_dfa->trim_sinks;
+ #store $dfa, "$PRE.dat";
+ } else {
+ print STDERR "$PRE.dat found..";
+ $dfa = retrieve "$PRE.dat";
+ }
+
+ $dfa->as_dft_strings(shift);
+}
+
+1;
diff --git a/lib/FLAT/DFA.pm b/lib/FLAT/DFA.pm
new file mode 100644
index 0000000..f96c9fb
--- /dev/null
+++ b/lib/FLAT/DFA.pm
@@ -0,0 +1,557 @@
+package FLAT::DFA;
+
+use strict;
+use base 'FLAT::NFA';
+use Storable qw(dclone);
+use Carp;
+$|++;
+
+sub set_starting {
+ my $self = shift;
+ $self->SUPER::set_starting(@_);
+
+ my $num = () = $self->get_starting;
+ confess "DFA must have exactly one starting state"
+ if $num != 1;
+}
+
+sub complement {
+ my $self = $_[0]->clone;
+
+ for my $s ($self->get_states) {
+ $self->is_accepting($s)
+ ? $self->unset_accepting($s)
+ : $self->set_accepting($s);
+ }
+
+ return $self;
+}
+
+sub _TUPLE_ID { join "\0", @_ }
+sub _uniq { my %seen; grep { !$seen{$_}++ } @_; }
+
+## this method still needs more work..
+sub intersect {
+ my @dfas = map { $_->as_dfa } @_;
+
+ my $return = FLAT::DFA->new;
+ my %newstates;
+ my @alpha = _uniq( map { $_->alphabet } @dfas );
+
+ $_->_extend_alphabet(@alpha) for @dfas;
+
+ my @start = map { $_->get_starting } @dfas;
+ my $start = $newstates{ _TUPLE_ID(@start) } = $return->add_states(1);
+ $return->set_starting($start);
+ $return->set_accepting($start)
+ if ! grep { ! $dfas[$_]->is_accepting( $start[$_] ) } 0 .. $#dfas;
+
+ my @queue = (\@start);
+ while (@queue) {
+ my @tuple = @{ shift @queue };
+
+ for my $char (@alpha) {
+ my @next = map { $dfas[$_]->successors( $tuple[$_], $char ) }
+ 0 .. $#dfas;
+
+ #warn "[@tuple] --> [@next] via $char\n";
+
+ if (not exists $newstates{ _TUPLE_ID(@next) }) {
+ my $s = $newstates{ _TUPLE_ID(@next) } = $return->add_states(1);
+ $return->set_accepting($s)
+ if ! grep { ! $dfas[$_]->is_accepting( $next[$_] ) } 0 .. $#dfas;
+ push @queue, \@next;
+ }
+
+ $return->add_transition( $newstates{ _TUPLE_ID(@tuple) },
+ $newstates{ _TUPLE_ID(@next) },
+ $char );
+ }
+ }
+
+ return $return;
+}
+
+# this is meant to enforce 1 starting state for a DFA, but it is getting us into trouble
+# when a DFA object calls unset_starting
+sub unset_starting {
+ my $self = shift;
+ $self->SUPER::unset_starting(@_);
+ my $num = () = $self->unset_starting;
+ croak "DFA must have exactly one starting state"
+ if $num != 1;
+}
+
+#### transformations
+
+sub trim_sinks {
+ my $self = shift;
+ my $result = $self->clone();
+ foreach my $state ($self->array_complement([$self->get_states()],[$self->get_accepting()])) {
+ my @ret = $self->successors($state,[$self->alphabet]);
+ if (@ret) {
+ if ($ret[0] == $state) {
+ $result->delete_states($state) if ($result->is_state($state));
+ }
+ }
+ }
+ return $result;
+}
+
+sub as_min_dfa {
+
+ my $self = shift()->clone;
+ my $N = $self->num_states;
+ my @alphabet = $self->alphabet;
+
+ my ($start) = $self->get_starting;
+ my %final = map { $_ => 1 } $self->get_accepting;
+
+ my @equiv = map [ (0) x ($_+1), (1) x ($N-$_-1) ], 0 .. $N-1;
+
+ while (1) {
+ my $changed = 0;
+ for my $s1 (0 .. $N-1) {
+ for my $s2 (grep { $equiv[$s1][$_] } 0 .. $N-1) {
+
+ if ( 1 == grep defined, @final{$s1, $s2} ) {
+ $changed = 1;
+ $equiv[$s1][$s2] = 0;
+ next;
+ }
+
+ for my $char (@alphabet) {
+ my @t = sort { $a <=> $b } $self->successors([$s1,$s2], $char);
+ next if @t == 1;
+
+ if (not $equiv[ $t[0] ][ $t[1] ]) {
+ $changed = 1;
+ $equiv[$s1][$s2] = 0;
+ }
+ }
+ }}
+
+ last if !$changed;
+ }
+ my $result = (ref $self)->new;
+ my %newstate;
+ my @classes;
+ for my $s (0 .. $N-1) {
+ next if exists $newstate{$s};
+
+ my @c = ( $s, grep { $equiv[$s][$_] } 0 .. $N-1 );
+ push @classes, \@c;
+
+ @newstate{@c} = ( $result->add_states(1) ) x @c;
+ }
+
+ for my $c (@classes) {
+ my $s = $c->[0];
+ for my $char (@alphabet) {
+ my ($next) = $self->successors($s, $char);
+ $result->add_transition( $newstate{$s}, $newstate{$next}, $char );
+ }
+ }
+
+ $result->set_starting( $newstate{$start} );
+ $result->set_accepting( $newstate{$_} )
+ for $self->get_accepting;
+
+ $result;
+
+}
+
+# the validity of a given string <-- executes symbols over DFA
+# if there is not transition for given state and symbol, it fails immediately
+# if the current state we're in is not final when symbols are exhausted, then it fails
+
+sub is_valid_string {
+ my $self = shift;
+ my $string = shift;
+ chomp $string;
+ my $OK = undef;
+ my @stack = split('',$string);
+ # this is confusing all funcs return arrays
+ my @current = $self->get_starting();
+ my $current = pop @current;
+ foreach (@stack) {
+ my @next = $self->successors($current,$_);
+ if (!@next) {
+ return $OK; #<--returns undef bc no transition found
+ }
+ $current = $next[0];
+ }
+ $OK++ if ($self->is_accepting($current));
+ return $OK;
+}
+
+#
+# Experimental!!
+#
+
+# DFT stuff in preparation for DFA pump stuff;
+sub as_node_list {
+ my $self = shift;
+ my %node = ();
+ for my $s1 ($self->get_states) {
+ $node{$s1} = {}; # initialize
+ for my $s2 ($self->get_states) {
+ my $t = $self->get_transition($s1, $s2);
+ if (defined $t) {
+ # array of symbols that $s1 will go to $s2 on...
+ push(@{$node{$s1}{$s2}},split(',',$t->as_string));
+ }
+ }
+ }
+ return %node;
+}
+
+sub as_acyclic_strings {
+ my $self = shift;
+ my %dflabel = (); # lookup table for dflable
+ my %backtracked = (); # lookup table for backtracked edges
+ my $lastDFLabel = 0;
+ my @string = ();
+ my %nodes = $self->as_node_list();
+ # output format is the actual PRE followed by all found strings
+ $self->acyclic($self->get_starting(),\%dflabel,$lastDFLabel,\%nodes,\@string);
+}
+
+sub acyclic {
+ my $self = shift;
+ my $startNode = shift;
+ my $dflabel_ref = shift;
+ my $lastDFLabel = shift;
+ my $nodes = shift;
+ my $string = shift;
+ # tree edge detection
+ if (!exists($dflabel_ref->{$startNode})) {
+ $dflabel_ref->{$startNode} = ++$lastDFLabel; # the order inwhich this link was explored
+ foreach my $adjacent (keys(%{$nodes->{$startNode}})) {
+ if (!exists($dflabel_ref->{$adjacent})) { # initial tree edge
+ foreach my $symbol (@{$nodes->{$startNode}{$adjacent}}) {
+ push(@{$string},$symbol);
+ $self->acyclic($adjacent,\%{$dflabel_ref},$lastDFLabel,\%{$nodes},\@{$string});
+ if ($self->array_is_subset([$adjacent],[$self->get_accepting()])) { #< proof of concept
+ printf("%s\n",join('',@{$string}));
+ }
+ pop(@{$string});
+ }
+ }
+ }
+ }
+ # remove startNode entry to facilitate acyclic path determination
+ delete($dflabel_ref->{$startNode});
+ #$lastDFLabel--;
+ return;
+};
+
+sub as_dft_strings {
+ my $self = shift;
+ my $depth = 1;
+ $depth = shift if (1 < $_[0]);
+ my %dflabel = (); # scoped lookup table for dflable
+ my %nodes = $self->as_node_list();
+ foreach (keys(%nodes)) {
+ $dflabel{$_} = []; # initialize container (array) for multiple dflables for each node
+ }
+ my $lastDFLabel = 0;
+ my @string = ();
+ $self->dft($self->get_starting(),[$self->get_accepting()],\%dflabel,$lastDFLabel,\%nodes,\@string,$depth);
+}
+
+sub dft {
+ my $self = shift;
+ my $startNode = shift;
+ my $goals_ref = shift;
+ my $dflabel_ref = shift;
+ my $lastDFLabel = shift;
+ my $nodes = shift;
+ my $string = shift;
+ my $DEPTH = shift;
+ # add start node to path
+ my $c1 = @{$dflabel_ref->{$startNode}}; # get number of elements
+ if ($DEPTH >= $c1) {
+ push(@{$dflabel_ref->{$startNode}},++$lastDFLabel);
+ foreach my $adjacent (keys(%{$nodes->{$startNode}})) {
+ my $c2 = @{$dflabel_ref->{$adjacent}};
+ if ($DEPTH > $c2) { # "initial" tree edge
+ foreach my $symbol (@{$nodes->{$startNode}{$adjacent}}) {
+ push(@{$string},$symbol);
+ $self->dft($adjacent,[@{$goals_ref}],$dflabel_ref,$lastDFLabel,$nodes,[@{$string}],$DEPTH);
+ # assumes some base path found
+ if ($self->array_is_subset([$adjacent],[@{$goals_ref}])) {
+ printf("%s\n",join('',@{$string}));
+ }
+ pop(@{$string});
+ }
+ }
+ } # remove startNode entry to facilitate acyclic path determination
+ pop(@{$dflabel_ref->{$startNode}});
+ $lastDFLabel--;
+ }
+};
+
+#
+# String gen using iterators (still experimental)
+#
+
+sub get_acyclic_sub {
+ my $self = shift;
+ my ($start,$nodelist_ref,$dflabel_ref,$string_ref,$accepting_ref,$lastDFLabel) = @_;
+ my @ret = ();
+ foreach my $adjacent (keys(%{$nodelist_ref->{$start}})) {
+ $lastDFLabel++;
+ if (!exists($dflabel_ref->{$adjacent})) {
+ $dflabel_ref->{$adjacent} = $lastDFLabel;
+ foreach my $symbol (@{$nodelist_ref->{$start}{$adjacent}}) {
+ push(@{$string_ref},$symbol);
+ my $string_clone = dclone($string_ref);
+ my $dflabel_clone = dclone($dflabel_ref);
+ push(@ret,sub { return $self->get_acyclic_sub($adjacent,$nodelist_ref,$dflabel_clone,$string_clone,$accepting_ref,$lastDFLabel); });
+ pop @{$string_ref};
+ }
+ }
+
+ }
+ return {substack=>[@ret],
+ lastDFLabel=>$lastDFLabel,
+ string => ($self->array_is_subset([$start],[@{$accepting_ref}]) ? join('',@{$string_ref}) : undef)};
+}
+sub init_acyclic_iterator {
+ my $self = shift;
+ my %dflabel = ();
+ my @string = ();
+ my $lastDFLabel = 0;
+ my %nodelist = $self->as_node_list();
+ my @accepting = $self->get_accepting();
+ # initialize
+ my @substack = ();
+ my $r = $self->get_acyclic_sub($self->get_starting(),\%nodelist,\%dflabel,\@string,\@accepting,$lastDFLabel);
+ push(@substack,@{$r->{substack}});
+ return sub {
+ while (1) {
+ if (!@substack) {
+ return undef;
+ }
+ my $s = pop @substack;
+ my $r = $s->();
+ push(@substack,@{$r->{substack}});
+ if ($r->{string}) {
+ return $r->{string};
+ }
+ }
+ }
+}
+
+sub new_acyclic_string_generator {
+ my $self = shift;
+ return $self->init_acyclic_iterator();
+}
+
+sub get_deepdft_sub {
+ my $self = shift;
+ my ($start,$nodelist_ref,$dflabel_ref,$string_ref,$accepting_ref,$lastDFLabel,$max) = @_;
+ my @ret = ();
+ my $c1 = @{$dflabel_ref->{$start}};
+ if ($c1 < $max) {
+ push(@{$dflabel_ref->{$start}},++$lastDFLabel);
+ foreach my $adjacent (keys(%{$nodelist_ref->{$start}})) {
+ my $c2 = @{$dflabel_ref->{$adjacent}};
+ if ($c2 < $max) {
+ foreach my $symbol (@{$nodelist_ref->{$start}{$adjacent}}) {
+ push(@{$string_ref},$symbol);
+ my $string_clone = dclone($string_ref);
+ my $dflabel_clone = dclone($dflabel_ref);
+ push(@ret,sub { return $self->get_deepdft_sub($adjacent,$nodelist_ref,$dflabel_clone,$string_clone,$accepting_ref,$lastDFLabel,$max); });
+ pop @{$string_ref};
+ }
+ }
+ }
+ }
+ return {substack=>[@ret], lastDFLabel=>$lastDFLabel, string => ($self->array_is_subset([$start],[@{$accepting_ref}]) ? join('',@{$string_ref}) : undef)};
+}
+
+sub init_deepdft_iterator {
+ my $self = shift;
+ my $MAXLEVEL = shift;
+ my %dflabel = ();
+ my @string = ();
+ my $lastDFLabel = 0;
+ my %nodelist = $self->as_node_list();
+ foreach my $node (keys(%nodelist)) {
+ $dflabel{$node} = []; # initializes anonymous arrays for all nodes
+ }
+ my @accepting = $self->get_accepting();
+ # initialize
+ my @substack = ();
+ my $r = $self->get_deepdft_sub($self->get_starting(),\%nodelist,\%dflabel,\@string,\@accepting,$lastDFLabel,$MAXLEVEL);
+ push(@substack,@{$r->{substack}});
+ return sub {
+ while (1) {
+ if (!@substack) {
+ return undef;
+ }
+ my $s = pop @substack;
+ my $r = $s->();
+ push(@substack,@{$r->{substack}});
+ if ($r->{string}) {
+ return $r->{string};
+ }
+ }
+ }
+}
+
+sub new_deepdft_string_generator {
+ my $self = shift;
+ my $MAXLEVEL = (@_ ? shift : 1);
+ return $self->init_deepdft_iterator($MAXLEVEL);
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+FLAT::DFA - Deterministic finite automata
+
+=head1 SYNOPSIS
+
+A FLAT::DFA object is a finite automata whose transitions are labeled
+with single characters. Furthermore, each state has exactly one outgoing
+transition for each available label/character.
+
+=head1 USAGE
+
+In addition to implementing the interface specified in L<FLAT> and L<FLAT::NFA>,
+FLAT::DFA objects provide the following DFA-specific methods:
+
+=over
+
+=item $dfa-E<gt>unset_starting
+
+Because a DFA, by definition, must have only ONE starting state, this allows one to unset
+the current start state so that a new one may be set.
+
+=item $dfa-E<gt>trim_sinks
+
+This method returns a FLAT::DFA (though in theory an NFA) that is lacking a transition for
+all symbols from all states. This method eliminates all transitions from all states that lead
+to a sink state; it also eliminates the sink state.
+
+This has no affect on testing if a string is valid using C<FLAT::DFA::is_valid_string>,
+discussed below.
+
+=item $dfa-E<gt>as_min_dfa
+
+This method minimizes the number of states and transitions in the given DFA. The modifies
+the current/calling DFA object.
+
+=item $dfa-E<gt>is_valid_string($string)
+
+This method tests if the given string is accepted by the DFA.
+
+=item $dfa-E<gt>as_node_list
+
+This method returns a node list in the form of a hash. This node list may be viewed as a
+pure digraph, and is lacking in state names and transition symbols.
+
+=item $dfa-E<gt>as_acyclic_strings
+
+The method is B<deprecated>, and it is suggested that one not use it. It returns all
+valid strings accepted by the DFA by exploring all acyclic paths that go from the start
+state and end in an accepting state. The issue with this method is that it finds and
+returns all strings at once. The iterator described below is much more ideal for actual
+use in an application.
+
+=item $dfa-E<gt>as_dft_strings($depth)
+
+The method is B<deprecated>, and it is suggested that one not use it. It returns all
+valid strings accepted by the DFA using a depth first traversal. A valid string is formed
+when the traversal detects an accepting state, whether it is a terminal node or a node reached
+via a back edge. The issue with this method is that it finds and returns all strings at once.
+The iterator described below is much more ideal for actual use in an application.
+
+The argument, C<$depth> specifies how many times the traversal may actually pass through
+a previously visited node. It is therefore possible to safely explore DFAs that accept
+infinite languages.
+
+=item $dfa-E<gt>new_acyclic_string_generator
+
+This allows one to initialize an iterator that returns a valid string on each successive
+call of the sub-ref that is returned. It returns all valid strings accepted by the DFA by
+exploring all acyclic paths that go from the start state and end in an accepting state.
+
+Example:
+
+ #!/usr/bin/env perl
+ use strict;
+ use FLAT::DFA;
+ use FLAT::NFA;
+ use FLAT::PFA;
+ use FLAT::Regex::WithExtraOps;
+
+ my $PRE = "abc&(def)*";
+ my $dfa = FLAT::Regex::WithExtraOps->new($PRE)->as_pfa->as_nfa->as_dfa->as_min_dfa->trim_sinks;
+ my $next = $dfa->new_acyclic_string_generator;
+ print "PRE: $PRE\n";
+ print "Acyclic:\n";
+ while (my $string = $next->()) {
+ print " $string\n";
+ }
+
+=item $dfa-E<gt>new_deepdft_string_generator($depth)
+
+This allows one to initialize an iterator that returns a valid string on each successive
+call of the sub-ref that is returned. It returns all valid strings accepted by the DFA using a
+depth first traversal. A valid string is formed when the traversal detects an accepting state,
+whether it is a terminal node or a node reached via a back edge.
+
+The argument, C<$depth> specifies how many times the traversal may actually pass through
+a previously visited node. It is therefore possible to safely explore DFAs that accept
+infinite languages.
+
+ #!/usr/bin/env perl
+ use strict;
+ use FLAT::DFA;
+ use FLAT::NFA;
+ use FLAT::PFA;
+ use FLAT::Regex::WithExtraOps;
+
+ my $PRE = "abc&(def)*";
+ my $dfa = FLAT::Regex::WithExtraOps->new($PRE)->as_pfa->as_nfa->as_dfa->as_min_dfa->trim_sinks;
+ my $next = $dfa->new_deepdft_string_generator();
+ print "Deep DFT (default):\n";
+ for (1..10) {
+ while (my $string = $next->()) {
+ print " $string\n";
+ last;
+ }
+ }
+
+ $next = $dfa->new_deepdft_string_generator(5);
+ print "Deep DFT (5):\n";
+ for (1..10) {
+ while (my $string = $next->()) {
+ print " $string\n";
+ last;
+ }
+ }
+
+=back
+
+=head1 AUTHORS & ACKNOWLEDGEMENTS
+
+FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and
+Brett Estrade E<lt>estradb at gmail dot comE<gt>.
+
+The initial version (FLAT::Legacy) by Brett Estrade was work towards an
+MS thesis at the University of Southern Mississippi.
+
+Please visit the Wiki at http://www.0x743.com/flat
+
+=head1 LICENSE
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
diff --git a/lib/FLAT/FA.pm b/lib/FLAT/FA.pm
new file mode 100644
index 0000000..dd77f50
--- /dev/null
+++ b/lib/FLAT/FA.pm
@@ -0,0 +1,554 @@
+package FLAT::FA;
+
+use strict;
+use base 'FLAT';
+use Carp;
+
+use FLAT::Transition;
+
+=head1 NAME
+
+FLAT::FA - Base class for regular finite automata
+
+=head1 SYNOPSIS
+
+A FLAT::FA object is a collection of states and transitions. Each state
+may be labeled as starting or accepting. Each transition between states
+is labeled with a transition object.
+
+=head1 USAGE
+
+FLAT::FA is a superclass that is not intended to be used directly. However,
+it does provide the following methods:
+
+=cut
+
+sub new {
+ my $pkg = shift;
+ bless {
+ STATES => [],
+ TRANS => [],
+ ALPHA => {}
+ }, $pkg;
+}
+
+sub get_states {
+ my $self = shift;
+ return 0 .. ($self->num_states - 1);
+}
+
+sub num_states {
+ my $self = shift;
+ return scalar @{ $self->{STATES} };
+}
+
+sub is_state {
+ my ($self, $state) = @_;
+ exists $self->{STATES}->[$state];
+}
+
+sub _assert_states {
+ my ($self, @states) = @_;
+ for (@states) {
+ croak "'$_' is not a state" if not $self->is_state($_);
+ }
+}
+sub _assert_non_states {
+ my ($self, @states) = @_;
+ for (@states) {
+ croak "There is already a state called '$_'" if $self->is_state($_);
+ }
+}
+
+sub delete_states {
+ my ($self, @states) = @_;
+
+ $self->_assert_states(@states);
+
+ for my $s ( sort { $b <=> $a } @states ) {
+ $self->_decr_alphabet($_)
+ for @{ splice @{ $self->{TRANS} }, $s, 1 };
+
+ $self->_decr_alphabet( splice @$_, $s, 1 )
+ for @{ $self->{TRANS} };
+
+ splice @{ $self->{STATES} }, $s, 1;
+ }
+}
+
+sub add_states {
+ my ($self, $num) = @_;
+ my $id = $self->num_states;
+
+ for my $s ( $id .. ($id+$num-1) ) {
+ push @$_, undef for @{ $self->{TRANS} };
+ push @{ $self->{TRANS} }, [ (undef) x ($s+1) ];
+ push @{ $self->{STATES} }, {
+ starting => 0,
+ accepting => 0
+ };
+ }
+
+ return wantarray ? ($id .. ($id+$num-1))
+ : $id+$num-1;
+}
+
+##############
+
+sub is_starting {
+ my ($self, $state) = @_;
+ $self->_assert_states($state);
+ return $self->{STATES}[$state]{starting};
+}
+sub set_starting {
+ my ($self, @states) = @_;
+ $self->_assert_states(@states);
+ $self->{STATES}[$_]{starting} = 1 for @states;
+}
+sub unset_starting {
+ my ($self, @states) = @_;
+ $self->_assert_states(@states);
+ $self->{STATES}[$_]{starting} = 0 for @states;
+}
+sub get_starting {
+ my $self = shift;
+ return grep { $self->is_starting($_) } $self->get_states;
+}
+
+##############
+
+sub is_accepting {
+ my ($self, $state) = @_;
+ $self->_assert_states($state);
+ return $self->{STATES}[$state]{accepting};
+}
+sub set_accepting {
+ my ($self, @states) = @_;
+ $self->_assert_states(@states);
+ $self->{STATES}[$_]{accepting} = 1 for @states;
+}
+sub unset_accepting {
+ my ($self, @states) = @_;
+ $self->_assert_states(@states);
+ $self->{STATES}[$_]{accepting} = 0 for @states;
+}
+sub get_accepting {
+ my $self = shift;
+ return grep { $self->is_accepting($_) } $self->get_states;
+}
+
+###############
+
+sub _decr_alphabet {
+ my ($self, $t) = @_;
+ return if not defined $t;
+ for ($t->alphabet) {
+ delete $self->{ALPHA}{$_} if not --$self->{ALPHA}{$_};
+ }
+}
+sub _incr_alphabet {
+ my ($self, $t) = @_;
+ return if not defined $t;
+ $self->{ALPHA}{$_}++ for $t->alphabet;
+}
+
+sub set_transition {
+ my ($self, $state1, $state2, @label) = @_;
+ $self->remove_transition($state1, $state2);
+
+ @label = grep defined, @label;
+ return if not @label;
+
+ my $t = $self->{TRANS_CLASS}->new(@label);
+ $self->_incr_alphabet($t);
+
+ $self->{TRANS}[$state1][$state2] = $t;
+}
+
+sub add_transition {
+ my ($self, $state1, $state2, @label) = @_;
+
+ @label = grep defined, @label;
+ return if not @label;
+
+ my $t = $self->get_transition($state1, $state2);
+ $self->_decr_alphabet($t);
+
+ if (!$t) {
+ $t = $self->{TRANS}[$state1][$state2] = $self->{TRANS_CLASS}->new;
+ }
+
+ $t->add(@label);
+ $self->_incr_alphabet($t);
+}
+
+sub get_transition {
+ my ($self, $state1, $state2) = @_;
+ $self->_assert_states($state1, $state2);
+
+ $self->{TRANS}[$state1][$state2];
+}
+
+sub remove_transition {
+ my ($self, $state1, $state2) = @_;
+
+ $self->_decr_alphabet( $self->{TRANS}[$state1][$state2] );
+ $self->{TRANS}[$state1][$state2] = undef;
+}
+
+# given a state and a symbol, it tells you
+# what the next state(s) are; do get successors
+# for find the successors for a set of symbols,
+# use array refs. For example:
+# @NEXT=$self->successors([@nodes],[@symbols]);
+sub successors {
+ my ($self, $state, $symb) = @_;
+
+ my @states = ref $state eq 'ARRAY' ? @$state : ($state);
+ my @symbs = defined $symb
+ ? (ref $symb eq 'ARRAY' ? @$symb : ($symb))
+ : ();
+
+ $self->_assert_states(@states);
+
+ my %succ;
+ for my $s (@states) {
+ $succ{$_}++
+ for grep { my $t = $self->{TRANS}[$s][$_];
+ defined $t && (@symbs ? $t->does(@symbs) : 1) } $self->get_states;
+ }
+
+ return keys %succ;
+}
+
+sub predecessors {
+ my $self = shift;
+ $self->clone->reverse->successors(@_);
+}
+
+# reverse - no change from NFA
+sub reverse {
+ my $self = $_[0]->clone;
+ $self->_transpose;
+
+ my @start = $self->get_starting;
+ my @final = $self->get_accepting;
+
+ $self->unset_accepting( $self->get_states );
+ $self->unset_starting( $self->get_states );
+
+ $self->set_accepting( @start );
+ $self->set_starting( @final );
+
+ $self;
+}
+
+# get an array of all symbols
+sub alphabet {
+ my $self = shift;
+ grep length, keys %{ $self->{ALPHA} };
+}
+
+# give an array of symbols, return the symbols that
+# are in the alphabet
+#sub is_in_alphabet {
+# my $self = shift;
+# my $
+#}
+
+############
+sub prune {
+ my $self = shift;
+
+ my @queue = $self->get_starting;
+ my %seen = map { $_ => 1 } @queue;
+
+ while (@queue) {
+ @queue = grep { ! $seen{$_}++ } $self->successors(\@queue);
+ }
+
+ my @useless = grep { !$seen{$_} } $self->get_states;
+ $self->delete_states(@useless);
+
+ return @useless;
+}
+
+
+############
+
+use Storable 'dclone';
+sub clone {
+ dclone( $_[0] );
+}
+
+sub _transpose {
+ my $self = shift;
+ my $N = $self->num_states - 1;
+
+ $self->{TRANS} = [
+ map {
+ my $row = $_;
+ [ map { $_->[$row] } @{$self->{TRANS}} ]
+ } 0 .. $N
+ ];
+}
+
+# tests to see if set1 is a subset of set2
+sub array_is_subset {
+ my $self = shift;
+ my $set1 = shift;
+ my $set2 = shift;
+ my $ok = 1;
+ my %setcount = ();
+ foreach ($self->array_unique(@{$set1}),$self->array_unique(@{$set2})) {
+ $setcount{$_}++;
+ }
+ foreach ($self->array_unique(@{$set1})) {
+ if ($setcount{$_} != 2) {
+ $ok = 0;
+ last;
+ }
+ }
+ return $ok;
+}
+
+sub array_unique {
+ my $self = shift;
+ my %ret = ();
+ foreach (@_) {
+ $ret{$_}++;
+ }
+ return keys(%ret);
+}
+
+sub array_complement {
+ my $self = shift;
+ my $set1 = shift;
+ my $set2 = shift;
+ my @ret = ();
+ # convert set1 to a hash
+ my %set1hash = map {$_ => 1} @{$set1};
+ # iterate of set2 and test if $set1
+ foreach (@{$set2}) {
+ if (!defined $set1hash{$_}) {
+ push(@ret,$_);
+ }
+ }
+ ## Now do the same using $set2
+ # convert set2 to a hash
+ my %set2hash = map {$_ => 1} @{$set2};
+ # iterate of set1 and test if $set1
+ foreach (@{$set1}) {
+ if (!defined $set2hash{$_}) {
+ push(@ret,$_);
+ }
+ }
+ # now @ret contains all items in $set1 not in $set 2 and all
+ # items in $set2 not in $set1
+ return @ret;
+}
+
+# returns all items that 2 arrays have in common
+sub array_intersect {
+ my $self = shift;
+ my $set1 = shift;
+ my $set2 = shift;
+ my %setcount = ();
+ my @ret = ();
+ foreach ($self->array_unique(@{$set1})) {
+ $setcount{$_}++;
+ }
+ foreach ($self->array_unique(@{$set2})) {
+ $setcount{$_}++;
+ push(@ret,$_) if ($setcount{$_} > 1);
+ }
+ return @ret;
+}
+
+# given a set of symbols, returns only the valid ones
+sub get_valid_symbols {
+ my $self = shift;
+ my $symbols = shift;
+ return $self->array_intersect([$self->alphabet()],[@{$symbols}])
+}
+
+## add an FA's states & transitions to this FA (as disjoint union)
+sub _swallow {
+ my ($self, $other) = @_;
+ my $N1 = $self->num_states;
+ my $N2 = $other->num_states;
+
+ push @$_, (undef) x $N2
+ for @{ $self->{TRANS} };
+
+ push @{ $self->{TRANS} }, [ (undef) x $N1, @{ clone $_ } ]
+ for @{ $other->{TRANS} };
+
+ push @{ $self->{STATES} }, @{ clone $other->{STATES} };
+
+ $self->{ALPHA}{$_} += $other->{ALPHA}{$_}
+ for keys %{ $other->{ALPHA} };
+
+ return map { $_ + $N1 } $other->get_states;
+}
+
+1;
+
+__END__
+
+
+=head2 Manipulation & Inspection Of States
+
+=over
+
+=item $fa-E<gt>get_states
+
+Returns a list of all the state "names" in $fa.
+
+=item $fa-E<gt>num_states
+
+Returns the number of states in $fa.
+
+=item $fa-E<gt>is_state($state_id)
+
+Returns a boolean indicating whether $state_id is a recognized state "name."
+
+=item $fa-E<gt>delete_states(@states)
+
+Deletes the states given in @states and their corresponding transitions. The
+remaining states in the FA may be "renamed" (renumbered)! Return value not
+used.
+
+=item $fa-E<gt>add_states($num)
+
+Adds $num states to $fa, and returns a list of the new state "names."
+
+=item $fa-E<gt>get_starting
+
+=item $fa-E<gt>get_accepting
+
+Returns a list of all the states which are labeled as starting/accepting,
+respectively.
+
+=item $fa-E<gt>set_accepting(@states)
+
+=item $fa-E<gt>unset_accepting(@states)
+
+=item $fa-E<gt>set_starting(@states)
+
+=item $fa-E<gt>unset_starting(@states)
+
+Sets/unsets a list of states as being labeled starting/accepting,
+respectively.
+
+=item $fa-E<gt>is_starting($state)
+
+=item $fa-E<gt>is_accepting($state)
+
+Returns a boolean indicating whether $state is labeled as starting/accepting,
+respectively.
+
+=item $fa-E<gt>prune
+
+Deletes the states which are not reachable (via zero or more transitions)
+from starting states. Returns a list of the "names" of states that were
+deleted.
+
+=back
+
+=head2 Manipulation & Inspection Of Transitions
+
+Each transition between states is a transition object, which knows how
+to organize several "labels." Think of this as the mechanism by which
+multiple arrows in the state diagram between the same states are collapsed
+to a single arrow. This interface is abstracted away into the following
+public methods:
+
+=over
+
+=item $fa-E<gt>set_transition($state1, $state2, @labels)
+
+Resets the transition between $state1 and $state2 to a transition
+initialized using data @labels. If @labels is omitted or contains
+only undefined elements, then the call is equivalent to C<remove_transition>.
+
+=item $fa-E<gt>add_transition($state1, $state2, @labels)
+
+Adds @labels to the transition between $state1 and $state2.
+
+=item $fa-E<gt>get_transition($state1, $state2)
+
+Returns the transition object stored between $state1 and $state2, or
+undef if there is no transition.
+
+=item $fa-E<gt>remove_transition($state1, $state2)
+
+Removes the transition object between $state1 and $state2.
+
+=item $fa-E<gt>successors(\@states)
+
+=item $fa-E<gt>successors($state)
+
+=item $fa-E<gt>successors(\@states, $label)
+
+=item $fa-E<gt>successors($state, $label)
+
+=item $fa-E<gt>successors(\@states, \@labels)
+
+=item $fa-E<gt>successors($state, \@labels)
+
+Given a state/set of states, and one or more labels, returns a list of
+the states (without duplicates) reachable from the states via a single
+transition having any of the given labels. If no labels are given, returns
+the states reachable by any (single) transition.
+
+Note that this method makes no distinction for epsilon transitions, these
+are only special in FLAT::NFA objects.
+
+=item $fa-E<gt>alphabet
+
+Returns the list of characters (without duplicates) used among all
+transition labels in the automaton.
+
+=back
+
+=head2 Conversions To External Formats
+
+=over
+
+=item $fa-E<gt>as_graphviz
+
+Returns a string containing a GraphViz (dot) description of the automaton,
+suitable for rendering with your favorite GraphViz layout engine.
+
+=item $fa-E<gt>as_summary
+
+Returns a string containing a plaintext description of the automaton,
+suitable for debugging purposes.
+
+=back
+
+=head2 Miscellaneous
+
+=over
+
+=item $fa-E<gt>clone
+
+Returns an identical copy of $fa.
+
+=back
+
+=head1 AUTHORS & ACKNOWLEDGEMENTS
+
+FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and
+Brett Estrade E<lt>estradb at gmail dot comE<gt>.
+
+The initial version (FLAT::Legacy) by Brett Estrade was work towards an
+MS thesis at the University of Southern Mississippi.
+
+Please visit the Wiki at http://www.0x743.com/flat
+
+=head1 LICENSE
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
diff --git a/lib/FLAT/NFA.pm b/lib/FLAT/NFA.pm
new file mode 100644
index 0000000..344ea76
--- /dev/null
+++ b/lib/FLAT/NFA.pm
@@ -0,0 +1,509 @@
+package FLAT::NFA;
+
+use strict;
+use base 'FLAT::FA';
+
+use FLAT::Transition;
+
+=head1 NAME
+
+FLAT::NFA - Nondeterministic finite automata
+
+=head1 SYNOPSIS
+
+A FLAT::NFA object is a finite automata whose transitions are labeled
+either with characters or the empty string (epsilon).
+
+=cut
+
+sub new {
+ my $pkg = shift;
+ my $self = $pkg->SUPER::new(@_);
+ $self->{TRANS_CLASS} = "FLAT::Transition";
+ return $self;
+}
+
+sub singleton {
+ my ($class, $char) = @_;
+ my $nfa = $class->new;
+
+ if (not defined $char) {
+ $nfa->add_states(1);
+ $nfa->set_starting(0);
+ } elsif ($char eq "") {
+ $nfa->add_states(1);
+ $nfa->set_starting(0);
+ $nfa->set_accepting(0);
+ } else {
+ $nfa->add_states(2);
+ $nfa->set_starting(0);
+ $nfa->set_accepting(1);
+ $nfa->set_transition(0, 1, $char);
+ }
+ return $nfa;
+}
+
+sub as_nfa { $_[0]->clone }
+
+sub union {
+ my @nfas = map { $_->as_nfa } @_;
+ my $result = $nfas[0]->clone;
+ $result->_swallow($_) for @nfas[1 .. $#nfas];
+ $result;
+}
+
+sub concat {
+ my @nfas = map { $_->as_nfa } @_;
+
+ my $result = $nfas[0]->clone;
+ my @newstate = ([ $result->get_states ]);
+ my @start = $result->get_starting;
+
+ for (1 .. $#nfas) {
+ push @newstate, [ $result->_swallow( $nfas[$_] ) ];
+ }
+
+ $result->unset_accepting($result->get_states);
+ $result->unset_starting($result->get_states);
+ $result->set_starting(@start);
+
+ for my $nfa_id (1 .. $#nfas) {
+ for my $s1 ($nfas[$nfa_id-1]->get_accepting) {
+ for my $s2 ($nfas[$nfa_id]->get_starting) {
+ $result->set_transition(
+ $newstate[$nfa_id-1][$s1],
+ $newstate[$nfa_id][$s2], "" );
+ }}
+ }
+
+ $result->set_accepting(
+ @{$newstate[-1]}[ $nfas[-1]->get_accepting ] );
+
+ $result;
+}
+
+sub kleene {
+ my $result = $_[0]->clone;
+
+ my ($newstart, $newfinal) = $result->add_states(2);
+
+ $result->set_transition($newstart, $_, "")
+ for $result->get_starting;
+ $result->unset_starting( $result->get_starting );
+ $result->set_starting($newstart);
+
+ $result->set_transition($_, $newfinal, "")
+ for $result->get_accepting;
+ $result->unset_accepting( $result->get_accepting );
+ $result->set_accepting($newfinal);
+
+ $result->set_transition($newstart, $newfinal, "");
+ $result->set_transition($newfinal, $newstart, "");
+
+ $result;
+}
+
+sub reverse {
+ my $self = $_[0]->clone;
+ $self->_transpose;
+
+ my @start = $self->get_starting;
+ my @final = $self->get_accepting;
+
+ $self->unset_accepting( $self->get_states );
+ $self->unset_starting( $self->get_states );
+
+ $self->set_accepting( @start );
+ $self->set_starting( @final );
+
+ $self;
+}
+
+###########
+
+sub is_empty {
+ my $self = shift;
+
+ my @queue = $self->get_starting;
+ my %seen = map { $_ => 1 } @queue;
+
+ while (@queue) {
+ return 0 if grep { $self->is_accepting($_) } @queue;
+ @queue = grep { !$seen{$_}++ } $self->successors(\@queue);
+ }
+ return 1;
+}
+
+sub is_finite {
+ my $self = shift;
+
+ my @alphabet = $self->alphabet;
+ return 1 if @alphabet == 0;
+
+ my @queue = $self->get_starting;
+ my %seen = map { $_ => 1 } @queue;
+
+ while (@queue) {
+ @queue = grep { !$seen{$_}++ } $self->successors(\@queue);
+ }
+
+ for my $s ( grep { $self->is_accepting($_) } keys %seen ) {
+ @queue = $self->epsilon_closure($s);
+ %seen = map { $_ => 1 } @queue;
+
+ while (@queue) {
+ my @next = $self->epsilon_closure(
+ $self->successors(\@queue, \@alphabet) );
+
+ return 0 if grep { $s eq $_ } @next;
+ @queue = grep { !$seen{$_}++ } @next;
+ }
+ }
+ return 1;
+}
+
+sub epsilon_closure {
+ my ($self, @states) = @_;
+ my %seen = map { $_ => 1 } @states;
+ my @queue = @states;
+
+ while (@queue) {
+ @queue = grep { ! $seen{$_}++ } $self->successors( \@queue, "" );
+ }
+
+ keys %seen;
+}
+
+
+sub contains {
+ my ($self, $string) = @_;
+
+ my @active = $self->epsilon_closure( $self->get_starting );
+ for my $char (split //, $string) {
+ return 0 if ! @active;
+ @active = $self->epsilon_closure( $self->successors(\@active, $char) );
+ }
+ return !! grep { $self->is_accepting($_) } @active;
+}
+
+sub trace {
+ my ($self, $string) = @_;
+
+ my @trace = ([ $self->epsilon_closure( $self->get_starting ) ]);
+
+ for my $char (split //, $string) {
+ push @trace,
+ [ $self->epsilon_closure( $self->successors($trace[-1], $char) ) ];
+ }
+ return @trace;
+}
+############
+
+sub _extend_alphabet {
+ my ($self, @alpha) = @_;
+
+ my %alpha = map { $_ => 1 } @alpha;
+ delete $alpha{$_} for $self->alphabet;
+
+ return if not keys %alpha;
+
+ my $trash = $self->add_states(1);
+ for my $state ($self->get_states) {
+ next if $state eq $trash;
+ for my $char (keys %alpha) {
+ $self->add_transition($state, $trash, $char);
+ }
+ }
+ $self->add_transition($trash, $trash, $self->alphabet);
+}
+
+######## transformations
+
+# subset construction
+sub as_dfa {
+ my $self = shift;
+
+ my $result = FLAT::DFA->new;
+ my %subset;
+
+ my %final = map { $_ => 1 } $self->get_accepting;
+ my @start = sort { $a <=> $b } $self->epsilon_closure( $self->get_starting );
+
+ my $start = $subset{ _SET_ID(@start) } = $result->add_states(1);
+ $result->set_starting($start);
+
+ $result->set_accepting( $start )
+ if grep $_, @final{@start};
+
+ my @queue = (\@start);
+ while (@queue) {
+ my @states = @{ shift @queue };
+ my $S = $subset{ _SET_ID(@states) };
+
+ for my $symb ($self->alphabet) {
+ my @to = $self->epsilon_closure(
+ $self->successors(\@states, $symb) );
+
+ if ( not exists $subset{_SET_ID(@to)} ) {
+ push @queue, \@to;
+ my $T = $subset{_SET_ID(@to)} = $result->add_states(1);
+ $result->set_accepting($T)
+ if grep $_, @final{@to};
+ }
+
+ $result->add_transition($S, $subset{ _SET_ID(@to) }, $symb);
+ }
+ }
+
+ $result;
+}
+
+############ Formatted output
+
+# Format that Dr. Sukhamay KUNDU likes to use in his assignments :)
+# This format is just a undirected graph - so transition and state info is lost
+
+sub as_undirected {
+ my $self = shift;
+ my @symbols = $self->alphabet();
+ my @states = $self->get_states();
+ my %edges = ();
+ foreach (@states) {
+ my $s = $_;
+ foreach (@symbols) {
+ my $a = $_;
+ # foreach state, get all nodes connected to it; ignore symbols and
+ # treat transitions simply as directed
+ push(@{$edges{$s}},$self->successors($s,$a));
+ foreach ($self->successors($s,$a)) {
+ push(@{$edges{$_}},$s);
+ }
+ }
+ }
+ my @lines = (($#states+1));
+ foreach (sort{$a <=> $b;}(keys(%edges))) { #<-- iterate over numerically sorted list of keys
+ @{$edges{$_}} = sort {$a <=> $b;} $self->array_unique(@{$edges{$_}}); #<- make items unique and sort numerically
+ push(@lines,sprintf("%s(%s):%s",$_,($#{$edges{$_}}+1),join(' ',@{$edges{$_}})));
+ }
+ return join("\n",@lines);
+ }
+
+# Format that Dr. Sukhamay KUNDU likes to use in his assignments :)
+# This format is just a directed graph - so transition and state info is lost
+
+sub as_digraph {
+ my $self = shift;
+ my @symbols = $self->alphabet();
+ my @states = $self->get_states();
+ my @lines = ();
+ foreach (@states) {
+ my $s = $_;
+ my @edges = ();
+ foreach (@symbols) {
+ my $a = $_;
+ # foreach state, get all nodes connected to it; ignore symbols and
+ # treat transitions simply as directed
+ push(@edges,$self->successors($s,$a));
+ }
+ @edges = sort {$a <=> $b;} $self->array_unique(@edges); #<- make items unique and sort numerically
+ push(@lines,sprintf("%s(%s): %s",$s,($#edges+1),join(' ',@edges)));
+ }
+ return sprintf("%s\n%s",($#states+1),join("\n",@lines));
+}
+
+
+# Graph Description Language, aiSee, etc
+sub as_gdl {
+ my $self = shift;
+
+ my @states = map {
+ sprintf qq{node: { title:"%s" shape:circle borderstyle: %s}\n},
+ $_,
+ ($self->is_accepting($_) ? "double bordercolor: red" : "solid")
+ } $self->get_states;
+
+ my @trans;
+ for my $s1 ($self->get_states) {
+ for my $s2 ($self->get_states) {
+ my $t = $self->get_transition($s1, $s2);
+
+ if (defined $t) {
+ push @trans, sprintf qq[edge: { source: "%s" target: "%s" label: "%s" arrowstyle: line }\n],
+ $s1, $s2, $t->as_string;
+ }
+ }}
+
+ return sprintf "graph: {\ndisplay_edge_labels: yes\n\n%s\n%s}\n",
+ join("", @states),
+ join("", @trans);
+}
+
+# Graphviz: dot, etc
+## digraph, directed
+sub as_graphviz {
+ my $self = shift;
+
+ my @states = map {
+ sprintf qq{%s [label="%s",shape=%s]\n},
+ $_,
+ ($self->is_starting($_) ? "start ($_)" : "$_"),
+ ($self->is_accepting($_) ? "doublecircle" : "circle")
+ } $self->get_states;
+
+ my @trans;
+ for my $s1 ($self->get_states) {
+ for my $s2 ($self->get_states) {
+ my $t = $self->get_transition($s1, $s2);
+
+ if (defined $t) {
+ push @trans, sprintf qq[%s -> %s [label="%s"]\n],
+ $s1, $s2, $t->as_string;
+ }
+ }}
+
+ return sprintf "digraph G {\ngraph [rankdir=LR]\n\n%s\n%s}\n",
+ join("", @states),
+ join("", @trans);
+}
+## undirected
+sub as_undirected_graphviz {
+ my $self = shift;
+
+ my @states = map {
+ sprintf qq{%s [label="%s",shape=%s]\n},
+ $_,
+ ("$_"),
+ ("circle")
+ } $self->get_states;
+
+ my @trans;
+ for my $s1 ($self->get_states) {
+ for my $s2 ($self->get_states) {
+ my $t = $self->get_transition($s1, $s2);
+
+ if (defined $t) {
+ push @trans, sprintf qq[%s -- %s\n],
+ $s1, $s2, $t->as_string;
+ }
+ }}
+
+ return sprintf "graph G {\ngraph [rankdir=LR]\n\n%s\n%s}\n",
+ join("", @states),
+ join("", @trans);
+}
+
+sub _SET_ID { return join "\0", sort { $a <=> $b } @_; }
+
+sub as_summary {
+ my $self = shift;
+ my $out = '';
+ $out .= sprintf ("States : ");
+ my @start;
+ my @final;
+ foreach ($self->get_states()) {
+ $out .= sprintf "'$_' ";
+ if ($self->is_starting($_)) {
+ push(@start,$_);
+ }
+ if ($self->is_accepting($_)) {
+ push(@final,$_);
+ }
+ }
+ $out .= sprintf ("\nStart State : '%s'\n",join('',@start));
+ $out .= sprintf ("Final State(s) : ");
+ foreach (@final) {
+ $out .= sprintf "'$_' ";
+ }
+ $out .= sprintf ("\nAlphabet : ");
+ foreach ($self->alphabet()) {
+ $out .= sprintf "'$_' ";
+ }
+ $out .= sprintf ("\nTransitions :\n");
+ my @trans;
+ for my $s1 ($self->get_states) {
+ for my $s2 ($self->get_states) {
+ my $t = $self->get_transition($s1, $s2);
+ if (defined $t) {
+ push @trans, sprintf qq[%s -> %s on "%s"\n],
+ $s1, $s2, $t->as_string;
+ }
+ }}
+ $out .= join('',@trans);
+ return $out;
+}
+
+1;
+
+__END__
+
+=head1 USAGE
+
+In addition to implementing the interface specified in L<FLAT>, FLAT::NFA
+objects provide the following NFA-specific methods:
+
+=over
+
+=item $nfa-E<gt>epsilon_closure(@states)
+
+Returns the set of states (without duplicates) which are reachable from
+@states via zero or more epsilon-labeled transitions.
+
+=item $nfa-E<gt>trace($string)
+
+Returns a list of N+1 arrayrefs, where N is the length of $string. The
+I-th arrayref contains the states which are reachable from the starting
+state(s) of $nfa after reading I characters of $string. Correctly accounts
+for epsilon transitions.
+
+=item $nfa-E<gt>as_undirected
+
+Outputs FA in a format that may be easily read into an external program as
+a description of an undirected graph.
+
+=item $nfa-E<gt>as_digraph
+
+Outputs FA in a format that may be easily read into an external program as
+a description of an directed graph.
+
+=item $nfa-E<gt>as_gdl
+
+Outputs FA in Graph Description Language (GDL), including directed transitions
+with symbols and state names labeled.
+
+=item $nfa-E<gt>as_graphviz
+
+Outputs FA in Graphviz format, including directed transitions with symbols and
+and state names labeled. This output may be directly piped into any of the
+Graphviz layout programs, and in turn one may output an image using a single
+commandline instruction. C<fash> uses this function to implement its "nfa2gv"
+command:
+
+ fash nfa2gv "a*b" | dot -Tpng > nfa.png
+
+=item $nfa-E<gt>as_undirected_graphviz
+
+Outputs FA in Graphviz format, with out the directed transitions or labels.
+The output is suitable for any of the Graphvize layout programs, as discussed
+above.
+
+=item $nfa-E<gt>as_summary
+
+Outputs a summary of the FA, including its states, symbols, and transition matrix.
+It is useful for manually validating what the FA looks like.
+
+=back
+
+=head1 AUTHORS & ACKNOWLEDGEMENTS
+
+FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and
+Brett Estrade E<lt>estradb at gmail dot comE<gt>.
+
+The initial version (FLAT::Legacy) by Brett Estrade was work towards an
+MS thesis at the University of Southern Mississippi.
+
+Please visit the Wiki at http://www.0x743.com/flat
+
+=head1 LICENSE
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
diff --git a/lib/FLAT/PFA.pm b/lib/FLAT/PFA.pm
new file mode 100644
index 0000000..5557525
--- /dev/null
+++ b/lib/FLAT/PFA.pm
@@ -0,0 +1,293 @@
+package FLAT::PFA;
+use strict;
+use base 'FLAT::NFA';
+use Carp;
+
+use FLAT::Transition;
+
+use constant LAMBDA => '#lambda';
+
+# Note: in a PFA, states are made up of active nodes. In this implementation, we have
+# decided to retain the functionality of the state functions in FA.pm, although the entities
+# being manipulated are technically nodes, not states. States are only explicitly tracked
+# once the PFA is serialized into an NFA. Therefore, the TRANS member of the PFA object is
+# the nodal transition function, gamma. The state transition function, delta, is not used
+# in anyway, but is derived out of the PFA->NFA conversion process.
+
+
+# The new way of doing things eliminated from PFA.pm of FLAT::Legacy is the
+# need to explicitly track: start nodes, final nodes, symbols, and lambda & epsilon symbols,
+
+sub new {
+ my $pkg = shift;
+ my $self = $pkg->SUPER::new(@_); # <-- SUPER is FLAT::NFA
+ return $self;
+}
+
+# Singleton is no different than the NFA singleton
+sub singleton {
+ my ($class, $char) = @_;
+ my $pfa = $class->new;
+ if (not defined $char) {
+ $pfa->add_states(1);
+ $pfa->set_starting(0);
+ } elsif ($char eq "") {
+ $pfa->add_states(1);
+ $pfa->set_starting(0);
+ $pfa->set_accepting(0);
+ } else {
+ $pfa->add_states(2);
+ $pfa->set_starting(0);
+ $pfa->set_accepting(1);
+ $pfa->set_transition(0, 1, $char);
+ }
+ return $pfa;
+}
+
+# attack of the clones
+sub as_pfa { $_[0]->clone() }
+
+sub set_starting {
+ my ($self, @states) = @_;
+ $self->_assert_states(@states);
+ $self->{STATES}[$_]{starting} = 1 for @states;
+}
+
+# Creates a single start state with epsilon transitions from
+# the former start states;
+# Creates a single final state with epsilon transitions from
+# the former accepting states
+sub pinch {
+ my $self = shift;
+ my $symbol = shift;
+ my @starting = $self->get_starting;
+ if (@starting > 1) {
+ my $newstart = $self->add_states(1);
+ map {$self->add_transition($newstart,$_,$symbol)} @starting;
+ $self->unset_starting(@starting);
+ $self->set_starting($newstart);
+ }
+ #
+ my @accepting = $self->get_accepting;
+ if (@accepting > 1) {
+ my $newfinal = $self->add_states(1);
+ map {$self->add_transition($_,$newfinal,$symbol)} @accepting;
+ $self->unset_accepting(@accepting);
+ $self->set_accepting($newfinal);
+ }
+ return;
+}
+
+# Implement the joining of two PFAs with lambda transitions
+# Note: using epsilon pinches for simplicity
+sub shuffle {
+ my @pfas = map { $_->as_pfa } @_;
+ my $result = $pfas[0]->clone;
+ $result->_swallow($_) for @pfas[1 .. $#pfas];
+ $result->pinch(LAMBDA);
+ $result;
+}
+
+##############
+
+sub is_tied {
+ my ($self, $state) = @_;
+ $self->_assert_states($state);
+ return $self->{STATES}[$state]{tied};
+}
+
+sub set_tied {
+ my ($self, @states) = @_;
+ $self->_assert_states(@states);
+ $self->{STATES}[$_]{tied} = 1 for @states;
+}
+
+sub unset_tied {
+ my ($self, @states) = @_;
+ $self->_assert_states(@states);
+ $self->{STATES}[$_]{tied} = 0 for @states;
+}
+
+sub get_tied {
+ my $self = shift;
+ return grep { $self->is_tied($_) } $self->get_states;
+}
+
+##############
+
+# joins two PFAs in a union (or) - no change from NFA
+sub union {
+ my @pfas = map { $_->as_pfa } @_;
+ my $result = $pfas[0]->clone;
+ $result->_swallow($_) for @pfas[1 .. $#pfas];
+ $result->pinch('');
+ $result;
+}
+
+# joins two PFAs via concatenation - no change from NFA
+sub concat {
+ my @pfas = map { $_->as_pfa } @_;
+
+ my $result = $pfas[0]->clone;
+ my @newstate = ([ $result->get_states ]);
+ my @start = $result->get_starting;
+
+ for (1 .. $#pfas) {
+ push @newstate, [ $result->_swallow( $pfas[$_] ) ];
+ }
+
+ $result->unset_accepting($result->get_states);
+ $result->unset_starting($result->get_states);
+ $result->set_starting(@start);
+
+ for my $pfa_id (1 .. $#pfas) {
+ for my $s1 ($pfas[$pfa_id-1]->get_accepting) {
+ for my $s2 ($pfas[$pfa_id]->get_starting) {
+ $result->set_transition(
+ $newstate[$pfa_id-1][$s1],
+ $newstate[$pfa_id][$s2], "" );
+ }}
+ }
+
+ $result->set_accepting(
+ @{$newstate[-1]}[ $pfas[-1]->get_accepting ] );
+
+ $result;
+}
+
+# forms closure around a the given PFA - no change from NFA
+sub kleene {
+ my $result = $_[0]->clone;
+
+ my ($newstart, $newfinal) = $result->add_states(2);
+
+ $result->set_transition($newstart, $_, "")
+ for $result->get_starting;
+ $result->unset_starting( $result->get_starting );
+ $result->set_starting($newstart);
+
+ $result->set_transition($_, $newfinal, "")
+ for $result->get_accepting;
+ $result->unset_accepting( $result->get_accepting );
+ $result->set_accepting($newfinal);
+
+ $result->set_transition($newstart, $newfinal, "");
+ $result->set_transition($newfinal, $newstart, "");
+
+ $result;
+}
+
+sub as_nfa {
+ my $self = shift;
+ my $result = FLAT::NFA->new();
+ # Dstates is initially populated with the start state, which
+ # is exactly the set of all nodes marked as a starting node
+ my @Dstates = [sort($self->get_starting())]; # I suppose all start states are considered 'tied'
+ my %DONE = (); # |- what about all accepting states? I think so...
+ # the main while loop that ends when @Dstates becomes exhausted
+ my %NEW = ();
+ while (@Dstates) {
+ my $current = pop(@Dstates);
+ my $currentid = join(',',@{$current});
+ $DONE{$currentid}++; # mark done
+ foreach my $symbol ($self->alphabet(),'') { # Sigma UNION epsilon
+ if (LAMBDA eq $symbol) {
+ my @NEXT = ();
+ my @tmp = $self->successors([@{$current}],$symbol);
+ if (@tmp) {
+ my @pred = $self->predecessors([@tmp],LAMBDA);
+ if ($self->array_is_subset([@pred],[@{$current}])) {
+ push(@NEXT,@tmp,$self->array_complement([@{$current}],[@pred]));
+ @NEXT = sort($self->array_unique(@NEXT));
+ my $nextid = join(',',@NEXT);
+ push(@Dstates,[@NEXT]) if (!exists($DONE{$nextid}));
+ # make new states if none exist and track
+ if (!exists($NEW{$currentid})) {$NEW{$currentid} = $result->add_states(1)};
+ if (!exists($NEW{$nextid})) {$NEW{$nextid} = $result->add_states(1) };
+ $result->add_transition($NEW{$currentid},$NEW{$nextid},'');
+ }
+ }
+ } else {
+ foreach my $node (@{$current}) {
+ my @tmp = $self->successors([$node],$symbol);
+ foreach my $new (@tmp) {
+ my @NEXT = ();
+ push(@NEXT,$new,$self->array_complement([@{$current}],[$node]));
+ @NEXT = sort($self->array_unique(@NEXT));
+ my $nextid = join(',',@NEXT);
+ push(@Dstates,[@NEXT]) if (!exists($DONE{$nextid}));
+ # make new states if none exist and track
+ if (!exists($NEW{$currentid})) {$NEW{$currentid} = $result->add_states(1)};
+ if (!exists($NEW{$nextid})) {$NEW{$nextid} = $result->add_states(1) };
+ $result->add_transition($NEW{$currentid},$NEW{$nextid},$symbol);
+ }
+ }
+ }
+ }
+ }
+ $result->set_starting($NEW{join(",",sort $self->get_starting())});
+ $result->set_accepting($NEW{join(",",sort $self->get_accepting())});
+ return $result;
+ }
+
+1;
+
+__END__
+
+=head1 NAME
+
+FLAT::PFA - Parallel finite automata
+
+=head1 SYNOPSIS
+
+A FLAT::PFA object is a finite automata whose transitions are labeled either
+with characters, the empty string (epsilon), or a concurrent line of execution
+(lambda). It essentially models two FSA in a non-deterministic way such that
+a string is valid it puts the FSA of the shuffled languages both into a final,
+or accepting, state. A PFA is an NFA, and as such exactly describes a regular
+language.
+
+A PFA contains nodes and states. A state is made up of whatever nodes happen
+to be active. There are two transition functions, nodal transitions and state
+transitions. When a PFA is converted into a NFA, there is no longer a need for
+nodes or nodal transitions, so they go are eliminated. PFA model state spaces
+much more compactly than NFA, and an N state PFA may represent 2**N non-deterministic
+states. This also means that a PFA may represent up to 2^(2^N) deterministic states.
+
+=head1 USAGE
+
+(not implemented yet)
+
+In addition to implementing the interface specified in L<FLAT> and L<FLAT::NFA>,
+FLAT::PFA objects provide the following PFA-specific methods:
+
+=over
+
+=item $pfa-E<gt>shuffle
+
+Shuffle construct for building a PFA out of a PRE (i.e., a regular expression with
+the shuffle operator)
+
+=item $pfa-E<gt>as_nfa
+
+Converts a PFA to an NFA by enumerating all states; similar to the Subset Construction
+Algorithm, it does not implement e-closure. Instead it treats epsilon transitions
+normally, and joins any states resulting from a lambda (concurrent) transition
+using an epsilon transition.
+
+=back
+
+=head1 AUTHORS & ACKNOWLEDGEMENTS
+
+FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and
+Brett Estrade E<lt>estradb at gmail dot comE<gt>.
+
+The initial version (FLAT::Legacy) by Brett Estrade was work towards an
+MS thesis at the University of Southern Mississippi.
+
+Please visit the Wiki at http://www.0x743.com/flat
+
+=head1 LICENSE
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
diff --git a/lib/FLAT/Regex.pm b/lib/FLAT/Regex.pm
new file mode 100644
index 0000000..2c5c243
--- /dev/null
+++ b/lib/FLAT/Regex.pm
@@ -0,0 +1,194 @@
+package FLAT::Regex;
+use base 'FLAT';
+use strict;
+use Carp;
+
+use FLAT::Regex::Parser;
+use FLAT::Regex::Op;
+
+my $PARSER = FLAT::Regex::Parser->new(qw[ alt concat star ]);
+#### TODO: error checking in the parse
+
+sub _parser { $PARSER }
+
+sub new {
+ my ($pkg, $string) = @_;
+ my $result = $pkg->_parser->parse($string)
+ or croak qq[``$string'' is not a valid regular expression];
+
+ $pkg->_from_op( $result );
+}
+
+sub _from_op {
+ my ($proto, $op) = @_;
+ $proto = ref $proto || $proto; ## I really do want this
+
+ bless [ $op ], $proto;
+}
+
+sub op {
+ $_[0][0];
+}
+
+use overload '""' => 'as_string';
+sub as_string {
+ $_[0]->op->as_string(0);
+}
+
+sub as_perl_regex {
+ my ($self, %opts) = @_;
+
+ my $fmt = $opts{anchored} ? '(?:\A%s\z)' : '(?:%s)';
+ return sprintf $fmt, $self->op->as_perl_regex(0);
+}
+
+sub contains {
+ my ($self, $string) = @_;
+ $string =~ $self->as_perl_regex(anchored => 1);
+}
+
+sub as_nfa {
+ $_[0]->op->as_nfa;
+}
+
+sub as_pfa {
+ $_[0]->op->as_pfa;
+}
+
+#### regular language standard interface implementation:
+#### TODO: parameter checking?
+
+sub as_regex {
+ $_[0];
+}
+
+sub union {
+ my $self = $_[0];
+ my $op = FLAT::Regex::op::alt->new( map { $_->as_regex->op } @_ );
+ $self->_from_op($op);
+}
+
+sub intersect {
+ my @dfas = map { $_->as_dfa } @_;
+ my $self = shift @dfas;
+ $self->intersect(@dfas)->as_regex;
+}
+
+sub complement {
+ my $self = shift;
+ $self->as_dfa->complement->as_regex;
+}
+
+sub concat {
+ my $self = $_[0];
+ my $op = FLAT::Regex::op::concat->new( map { $_->as_regex->op } @_ );
+ $self->_from_op($op);
+}
+
+sub kleene {
+ my $self = shift;
+ my $op = FLAT::Regex::op::star->new( $self->op );
+ $self->_from_op($op);
+}
+
+sub reverse {
+ my $self = shift;
+ my $op = $self->op->reverse;
+ $self->_from_op($op);
+}
+
+sub is_empty {
+ $_[0]->op->is_empty;
+}
+
+sub is_finite {
+ $_[0]->op->is_finite;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+FLAT::Regex - Regular expressions
+
+=head1 SYNOPSIS
+
+A FLAT::Regex object is a regular expression.
+
+=head1 USAGE
+
+In addition to implementing the interface specified in L<FLAT>, FLAT::Regex
+objects provide the following regex-specific methods:
+
+=over
+
+=item FLAT::Regex-E<gt>new($string)
+
+Returns a regex object representing the expression given in $string. C<|>
+and C<+> can both be used to denote alternation. C<*> denotes Kleene star, and
+parentheses can be used for grouping. No other features or shortcut notation
+is currently supported (character classes, {n,m} repetition, etc).
+
+Whitespaces is ignored. To specify a literal space, use C<[ ]>. This syntax
+can also be used to specify atomic "characters" longer than a single
+character. For example, the expression:
+
+ [foo]abc[bar]*
+
+is treated as a regular expression over the symbols "a", "b", "c", "foo",
+and "bar". In particular, this means that when the regular expression is
+reversed, "foo" and "bar" remain the same (i.e, they do not become "oof" and
+"rab").
+
+The empty regular expression (epsilon) is written as C<[]>, and the null
+regular expression (sometimes called phi) is specified with the C<#>
+character. To specify a literal hash-character, use C<[#]>. Including
+literal square bracket characters is currently not supported.
+
+The expression "" (or any string containing only whitespace) is not a valid
+FLAT regex expression. Either C<[]> or C<#> are probably what was intended.
+
+=item $regex-E<gt>as_string
+
+Returns the string representation of the regex, in the same format as above.
+It is NOT necessarily true that
+
+ FLAT::Regex->new($string)->as_string
+
+is identical to $string, especially if $string contains whitespace or
+redundant parentheses.
+
+=item $regex-E<gt>as_perl_regex
+
+=item $regex-E<gt>as_perl_regex(anchored => $bool);
+
+Returns an equivalent Perl regular expression. If the "anchored" option
+is set to a true value, the regular expression will be anchored with
+C<\A> and C<\z>. The default behavior is to omit the anchors.
+
+The Perl regex will not contain capturing parentheses. "Extended" characters
+that are written as "[char]" in FLAT regexes will be written without the
+square brackets in the corresponding Perl regex. So the following:
+
+ FLAT::Regex->new("[foo][bar]*")->as_perl_regex
+
+will be equal to "(?:foo(?:bar)*)".
+
+=back
+
+=head1 AUTHORS & ACKNOWLEDGEMENTS
+
+FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and
+Brett Estrade E<lt>estradb at gmail dot comE<gt>.
+
+The initial version (FLAT::Legacy) by Brett Estrade was work towards an
+MS thesis at the University of Southern Mississippi.
+
+Please visit the Wiki at http://www.0x743.com/flat
+
+=head1 LICENSE
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
diff --git a/lib/FLAT/Regex/Op.pm b/lib/FLAT/Regex/Op.pm
new file mode 100644
index 0000000..76e796c
--- /dev/null
+++ b/lib/FLAT/Regex/Op.pm
@@ -0,0 +1,282 @@
+package FLAT::Regex::Op;
+use strict;
+
+sub new {
+ my $pkg = shift;
+ ## flatten alike operations, i.e, "a+(b+c)" into "a+b+c"
+ my @flat = map { UNIVERSAL::isa($_, $pkg) ? $_->members : $_ } @_;
+
+ bless \@flat, $pkg;
+}
+
+sub members {
+ my $self = shift;
+ wantarray ? @$self[0 .. $#$self] : $self->[0];
+}
+
+
+#################################
+#### regex operators / components
+
+package FLAT::Regex::Op::atomic;
+use base 'FLAT::Regex::Op';
+
+sub as_string {
+ my $t = $_[0]->members;
+
+ return "#" if not defined $t;
+ return $t =~ /^\w$/
+ ? $t
+ : "[$t]";
+}
+
+sub as_perl_regex {
+ my $r = $_[0]->members;
+
+ return "(?!)" if not defined $r;
+
+ $r = quotemeta $r;
+ return $r =~ /^\w$/ ? $r : "(?:$r)";
+}
+
+sub as_nfa {
+ FLAT::NFA->singleton( $_[0]->members );
+}
+
+sub as_pfa {
+ FLAT::PFA->singleton( $_[0]->members );
+}
+
+sub from_parse {
+ my ($pkg, @item) = @_;
+ my $i = $item[1];
+
+ return $pkg->new("") if $i eq "[]";
+ return $pkg->new(undef) if $i eq "#";
+
+ $i =~ s/^\[|\]$//g;
+
+ return $pkg->new($i);
+}
+
+sub reverse {
+ $_[0];
+}
+
+sub is_empty {
+ not defined $_[0]->members;
+}
+
+sub has_nonempty_string {
+ my $self = shift;
+ defined $self->members and length $self->members;
+}
+
+sub is_finite {
+ 1
+}
+
+##############################
+package FLAT::Regex::Op::star;
+use base 'FLAT::Regex::Op';
+
+sub parse_spec { "%s '*'" }
+sub precedence { 30 }
+
+sub as_string {
+ my ($self, $prec) = @_;
+ my $result = $self->members->as_string($self->precedence) . "*";
+ return $prec > $self->precedence ? "($result)" : $result;
+}
+
+sub as_perl_regex {
+ my ($self, $prec) = @_;
+ my $result = $self->members->as_perl_regex($self->precedence) . "*";
+ return $prec > $self->precedence ? "(?:$result)" : $result;
+}
+
+sub as_nfa {
+ my $self = shift;
+ $self->members->as_nfa->kleene;
+}
+
+sub as_pfa {
+ my $self = shift;
+ $self->members->as_pfa->kleene;
+}
+
+sub from_parse {
+ my ($pkg, @item) = @_;
+ $pkg->new( $item[1] );
+}
+
+sub reverse {
+ my $self = shift;
+ my $op = $self->members->reverse;
+ __PACKAGE__->new($op);
+}
+
+sub is_empty {
+ 0
+}
+
+sub has_nonempty_string {
+ $_[0]->members->has_nonempty_string;
+}
+
+sub is_finite {
+ ! $_[0]->members->has_nonempty_string;
+}
+
+
+################################
+package FLAT::Regex::Op::concat;
+use base 'FLAT::Regex::Op';
+
+sub parse_spec { "%s(2..)"; }
+sub precedence { 20 }
+
+sub as_string {
+ my ($self, $prec) = @_;
+ my $result = join "",
+ map { $_->as_string($self->precedence) }
+ $self->members;
+ return $prec > $self->precedence ? "($result)" : $result;
+}
+
+sub as_perl_regex {
+ my ($self, $prec) = @_;
+ my $result = join "",
+ map { $_->as_perl_regex($self->precedence) }
+ $self->members;
+ return $prec > $self->precedence ? "(?:$result)" : $result;
+}
+
+sub as_nfa {
+ my $self = shift;
+ my @parts = map { $_->as_nfa } $self->members;
+ $parts[0]->concat( @parts[1..$#parts] );
+}
+
+sub as_pfa {
+ my $self = shift;
+ my @parts = map { $_->as_pfa } $self->members;
+ $parts[0]->concat( @parts[1..$#parts] );
+}
+
+sub from_parse {
+ my ($pkg, @item) = @_;
+ $pkg->new( @{ $item[1] } );
+}
+
+## note: "reverse" conflicts with perl builtin
+sub reverse {
+ my $self = shift;
+ my @ops = CORE::reverse map { $_->reverse } $self->members;
+ __PACKAGE__->new(@ops);
+}
+
+sub is_empty {
+ my $self = shift;
+ my @members = $self->members;
+ for (@members) {
+ return 1 if $_->is_empty;
+ }
+ return 0;
+}
+
+sub has_nonempty_string {
+ my $self = shift;
+ return 0 if $self->is_empty;
+
+ my @members = $self->members;
+ for (@members) {
+ return 1 if $_->has_nonempty_string;
+ }
+ return 0;
+}
+
+sub is_finite {
+ my $self = shift;
+ return 1 if $self->is_empty;
+
+ my @members = $self->members;
+ for (@members) {
+ return 0 if not $_->is_finite;
+ }
+ return 1;
+}
+
+#############################
+package FLAT::Regex::Op::alt;
+use base 'FLAT::Regex::Op';
+
+sub parse_spec { "%s(2.. /[+|]/)" }
+sub precedence { 10 }
+
+sub as_string {
+ my ($self, $prec) = @_;
+ my $result = join "+",
+ map { $_->as_string($self->precedence) }
+ $self->members;
+ return $prec > $self->precedence ? "($result)" : $result;
+}
+
+sub as_perl_regex {
+ my ($self, $prec) = @_;
+ my $result = join "|",
+ map { $_->as_perl_regex($self->precedence) }
+ $self->members;
+ return $prec > $self->precedence ? "(?:$result)" : $result;
+}
+
+sub as_nfa {
+ my $self = shift;
+ my @parts = map { $_->as_nfa } $self->members;
+ $parts[0]->union( @parts[1..$#parts] );
+}
+
+sub as_pfa {
+ my $self = shift;
+ my @parts = map { $_->as_pfa } $self->members;
+ $parts[0]->union( @parts[1..$#parts] );
+}
+
+sub from_parse {
+ my ($pkg, @item) = @_;
+ $pkg->new( @{ $item[1] } );
+}
+
+sub reverse {
+ my $self = shift;
+ my @ops = map { $_->reverse } $self->members;
+ __PACKAGE__->new(@ops);
+}
+
+sub is_empty {
+ my $self = shift;
+ my @members = $self->members;
+ for (@members) {
+ return 0 if not $_->is_empty;
+ }
+ return 1;
+}
+
+sub has_nonempty_string {
+ my $self = shift;
+ my @members = $self->members;
+ for (@members) {
+ return 1 if $_->has_nonempty_string;
+ }
+ return 0;
+}
+
+sub is_finite {
+ my $self = shift;
+ my @members = $self->members;
+ for (@members) {
+ return 0 if not $_->is_finite;
+ }
+ return 1;
+}
+1;
diff --git a/lib/FLAT/Regex/Parser.pm b/lib/FLAT/Regex/Parser.pm
new file mode 100644
index 0000000..deb73f1
--- /dev/null
+++ b/lib/FLAT/Regex/Parser.pm
@@ -0,0 +1,82 @@
+package FLAT::Regex::Parser;
+use strict;
+
+#### Is this one level of abstraction too far? Parser generator generators..
+
+#### TODO: try YAPP, since recursive descent is SLOOOW
+use Parse::RecDescent;
+use FLAT::Regex::Op;
+
+use vars '$CHAR';
+$CHAR = qr{ [A-Za-z0-9_\$\#] | \[[^\]]*\] }x;
+
+sub new {
+ my $pkg = shift;
+ my @ops = sort { $a->{prec} <=> $b->{prec} }
+ map {{
+ pkg => "FLAT::Regex::Op::$_",
+ prec => "FLAT::Regex::Op::$_"->precedence,
+ spec => "FLAT::Regex::Op::$_"->parse_spec,
+ short => $_
+ }} @_;
+
+ my $lowest = shift @ops;
+ my $grammar = qq!
+ parse:
+ $lowest->{short} /^\\Z/ { \$item[1] }
+ !;
+
+ my $prev = $lowest;
+ for (@ops) {
+ my $spec = sprintf $prev->{spec}, $_->{short};
+
+ $grammar .= qq!
+ $prev->{short}:
+ $spec { $prev->{pkg}\->from_parse(\@item) }
+ | $_->{short} { \$item[1] }
+ !;
+
+ $prev = $_;
+ }
+
+ my $spec = sprintf $prev->{spec}, "atomic";
+ $grammar .= qq!
+ $prev->{short}:
+ $spec { $prev->{pkg}\->from_parse(\@item) }
+ | atomic { \$item[1] }
+
+ atomic:
+ "(" $lowest->{short} ")" { \$item[2] }
+ | /\$FLAT::Regex::Parser::CHAR/
+ { FLAT::Regex::Op::atomic->from_parse(\@item) }
+ !;
+
+ Parse::RecDescent->new($grammar);
+}
+
+1;
+
+
+__END__
+
+original parser:
+
+use vars '$CHAR';
+$CHAR = qr{ [A-Za-z0-9_\!\@\#\$\%\&] | \[[^\]]*\] }x;
+
+my $PARSER = Parse::RecDescent->new(<<'__EOG__') or die;
+ parse:
+ alt /^\Z/ { $item[1] }
+ alt:
+ concat(2.. /[+|]/) { FLAT::Regex::Op::alt->from_parse(@item) }
+ | concat { $item[1] }
+ concat:
+ star(2..) { FLAT::Regex::Op::concat->from_parse(@item) }
+ | star { $item[1] }
+ star :
+ atomic '*' { FLAT::Regex::Op::star->from_parse(@item) }
+ | atomic { $item[1] }
+ atomic:
+ "(" alt ")" { $item[2] }
+ | /$FLAT::Regex::CHAR/ { FLAT::Regex::Op::atomic->from_parse(@item) }
+__EOG__
diff --git a/lib/FLAT/Regex/Transform.pm b/lib/FLAT/Regex/Transform.pm
new file mode 100644
index 0000000..cd0cf56
--- /dev/null
+++ b/lib/FLAT/Regex/Transform.pm
@@ -0,0 +1,18 @@
+package FLAT::Regex::Transform;
+
+# Extends FLAT::Regex::WithExtraOps with PRegex transformations
+# (i.e., reductions based on: w*v & a*b
+
+use base 'FLAT::Regex::WithExtraOps';
+
+sub new {
+ my $pkg = shift;
+ my $self = $pkg->SUPER::new(@_);
+ return $self;
+}
+
+# Ideally, the transformation should be implemented as an iterator. This
+# approach will be finite for shuffles with NO closed strings, but will carry on
+# indefinitely for the shuffle of strings where at least one of the strings is closed
+
+1;
diff --git a/lib/FLAT/Regex/Util.pm b/lib/FLAT/Regex/Util.pm
new file mode 100644
index 0000000..516ad9f
--- /dev/null
+++ b/lib/FLAT/Regex/Util.pm
@@ -0,0 +1,33 @@
+package FLAT::Regex::Util;
+use base 'FLAT::Regex';
+
+use strict;
+use Carp;
+
+sub get_symbol {
+ my @symbols = qw/0 1/;
+ return $symbols[rand(2)];
+}
+
+sub get_op {
+ my @ops = ('*','+','&','','','','','','','');
+ return $ops[rand(10)];
+}
+
+sub get_random {
+ my $length = shift;
+ my $string = '';
+ if (1 < $length) {
+ $string = get_symbol().get_op().get_random(--$length);
+ } else {
+ $string = get_symbol();
+ }
+ return $string;
+}
+
+sub random_pre {
+ my $length = ( $_[0] ? $_[0] : 32 );
+ return FLAT::Regex::WithExtraOps->new(get_random($length));
+}
+
+1;
diff --git a/lib/FLAT/Regex/WithExtraOps.pm b/lib/FLAT/Regex/WithExtraOps.pm
new file mode 100644
index 0000000..b366d7c
--- /dev/null
+++ b/lib/FLAT/Regex/WithExtraOps.pm
@@ -0,0 +1,109 @@
+package FLAT::Regex::WithExtraOps;
+use base 'FLAT::Regex';
+
+use strict;
+use Carp;
+
+my $PARSER = FLAT::Regex::Parser->new(qw[ alt concat star negate shuffle ]);
+sub _parser { $PARSER }
+
+sub members {
+ my $self = shift;
+ wantarray ? @$self[0 .. $#$self] : $self->[0];
+}
+
+#### Precedence
+# 30 ::star
+# 20 ::concat
+# 15 ::negate <---<< WithExtraOps
+# 12 ::shuffle <---<< WithExtraOps
+# 10 ::alt
+# 0 ::atomic
+
+###############################
+package FLAT::Regex::Op::negate;
+use base "FLAT::Regex::Op";
+use Carp;
+
+sub parse_spec { "'~' %s"; }
+sub precedence { 15 } # between concat and alternation
+
+sub as_string {
+ my ($self, $prec) = @_;
+ my $result = "~" . $self->members->as_string($self->precedence);
+ return $prec > $self->precedence ? "($result)" : $result;
+}
+
+sub from_parse {
+ my ($pkg, @item) = @_;
+ $pkg->new( $item[2] );
+}
+
+## note: "reverse" conflicts with perl builtin
+sub reverse {
+ my $self = shift;
+ my $op = $self->members->reverse;
+ __PACKAGE__->new($op);
+}
+
+sub is_empty {
+ croak "Not implemented for negated regexes";
+}
+
+sub has_nonempty_string {
+ croak "Not implemented for negated regexes";
+}
+
+sub is_finite {
+ croak "Not implemented for negated regexes";
+}
+
+###############################
+package FLAT::Regex::Op::shuffle;
+use base 'FLAT::Regex::Op';
+use Carp;
+
+sub parse_spec { "%s(2.. /[&]/)" }
+sub precedence { 12 }
+
+sub as_string {
+ my ($self, $prec) = @_;
+ my $result = join "&",
+ map { $_->as_string($self->precedence) }
+ $self->members;
+ return $prec > $self->precedence ? "($result)" : $result;
+}
+
+sub as_perl_regex {
+ my $self = shift;
+ croak "Not implemented for shuffled regexes";
+}
+
+sub from_parse {
+ my ($pkg, @item) = @_;
+ $pkg->new( @{ $item[1] } );
+}
+
+sub as_pfa {
+ my $self = shift;
+ my @parts = map { $_->as_pfa } $self->members;
+ $parts[0]->shuffle( @parts[1..$#parts] );
+}
+
+# Implement?
+sub reverse {
+ my $self = shift;
+ croak "Not implemented for shuffled regexes";
+}
+
+sub is_empty {
+ croak "Not implemented for shuffled regexes";
+}
+
+sub has_nonempty_string {
+ croak "Not implemented for shuffled regexes";
+}
+
+sub is_finite {
+ croak "Not implemented for shuffled regexes";
+}
diff --git a/lib/FLAT/Symbol.pm b/lib/FLAT/Symbol.pm
new file mode 100644
index 0000000..aaadccc
--- /dev/null
+++ b/lib/FLAT/Symbol.pm
@@ -0,0 +1,98 @@
+#
+# Conceptual Experiment - not currently implemented anywhere...
+#
+
+package FLAT::Symbol
+
+use strict;
+use Carp;
+
+sub new {
+ my ($pkg, $string, $type) = @_;
+ bless {
+ STRING => $string,
+ TYPE => $type,
+ }, $pkg;
+}
+
+sub as_string {
+ return $_[0]->{STRING};
+}
+
+sub get_type }
+ return $_[0]->{TYPE};
+}
+
+sub set_type {
+ $_[0]->{TYPE} = $_[1];
+}
+
+1;
+
+##################
+
+package FLAT::Symbol::Regular;
+use base 'FLAT::Symbol';
+
+sub new {
+ my $pkg = shift;
+ my $self = $pkg->SUPER::new($_[0],'Regular');
+ return $self;
+}
+
+sub get_type {
+ return 'Regular';
+}
+
+sub set_type {
+ croak("Sorry, can't change type for this symbol");
+}
+
+1;
+
+##################
+
+package FLAT::Symbol::Special;
+use base 'FLAT::Symbol';
+
+sub new {
+ my $pkg = shift;
+ my $self = $pkg->SUPER::new($_[0],'Special');
+ return $self;
+}
+
+sub get_type {
+ return 'Special';
+}
+
+sub set_type {
+ croak("Sorry, can't change type for this symbol");}
+
+1;
+
+__END__
+
+=head1 NAME
+
+FLAT::Symbol - Base class for transition symbol.
+
+=head1 SYNOPSIS
+
+A super class that is intended to provide a simple mechanism for storing a symbol that might be
+in conflict with another symbol in string form. TYPE is used to distinguish. Currenly this neither
+this, nor its current sub classes, FLAT::Symbol::Regular and FLAT::Symbol::Special, are used.
+
+=head1 AUTHORS & ACKNOWLEDGEMENTS
+
+FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and
+Brett Estrade E<lt>estradb at gmail dot comE<gt>.
+
+The initial version (FLAT::Legacy) by Brett Estrade was work towards an
+MS thesis at the University of Southern Mississippi.
+
+Please visit the Wiki at http://www.0x743.com/flat
+
+=head1 LICENSE
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
diff --git a/lib/FLAT/Transition.pm b/lib/FLAT/Transition.pm
new file mode 100644
index 0000000..fc385f3
--- /dev/null
+++ b/lib/FLAT/Transition.pm
@@ -0,0 +1,66 @@
+package FLAT::Transition;
+use strict;
+use Carp;
+
+sub new {
+ my ($pkg, @things) = @_;
+ bless { map { $_ => 1 } @things }, $pkg;
+}
+
+sub does {
+ my ($self, @things) = @_;
+ return 1 if @things == 0;
+ return !! grep $self->{$_}, @things;
+}
+
+sub add {
+ my ($self, @things) = @_;
+ @$self{@things} = (1) x @things;
+}
+
+sub delete {
+ my ($self, @things) = @_;
+ delete $self->{$_} for @things;
+}
+
+sub alphabet {
+ my $self = shift;
+ sort { $a cmp $b } keys %$self;
+}
+
+sub as_string {
+ my $self = shift;
+ join ",", map { length $_ ? $_ : "epsilon" } $self->alphabet;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+FLAT::Transition - a transition base class.
+
+=head1 SYNOPSIS
+
+Default implementation of the Transition class, used to manage transitions
+from one state to others. This class is meant for internal use.
+
+=head1 USAGE
+
+used internally;
+
+=head1 AUTHORS & ACKNOWLEDGEMENTS
+
+FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and
+Brett Estrade E<lt>estradb at gmail dot comE<gt>.
+
+The initial version (FLAT::Legacy) by Brett Estrade was work towards an
+MS thesis at the University of Southern Mississippi.
+
+Please visit the Wiki at http://www.0x743.com/flat
+
+=head1 LICENSE
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
diff --git a/lib/FLAT/XFA.pm b/lib/FLAT/XFA.pm
new file mode 100644
index 0000000..67e073c
--- /dev/null
+++ b/lib/FLAT/XFA.pm
@@ -0,0 +1,3 @@
+package FLAT::XFA;
+
+1;