diff options
author | Daniel Friesel <derf@finalrewind.org> | 2017-04-03 15:04:15 +0200 |
---|---|---|
committer | Daniel Friesel <derf@finalrewind.org> | 2017-04-03 15:04:15 +0200 |
commit | 00e57331b1c7ef2b1f402f41e1223308e0d8ce61 (patch) | |
tree | 05e9b4223072582a5a6843de6d9845213a94f341 /lib/FLAT |
initial commit
Diffstat (limited to 'lib/FLAT')
-rw-r--r-- | lib/FLAT/CMD.pm | 533 | ||||
-rw-r--r-- | lib/FLAT/CMD/AcyclicStrings.pm | 54 | ||||
-rw-r--r-- | lib/FLAT/CMD/DFTStrings.pm | 55 | ||||
-rw-r--r-- | lib/FLAT/DFA.pm | 557 | ||||
-rw-r--r-- | lib/FLAT/FA.pm | 554 | ||||
-rw-r--r-- | lib/FLAT/NFA.pm | 509 | ||||
-rw-r--r-- | lib/FLAT/PFA.pm | 293 | ||||
-rw-r--r-- | lib/FLAT/Regex.pm | 194 | ||||
-rw-r--r-- | lib/FLAT/Regex/Op.pm | 282 | ||||
-rw-r--r-- | lib/FLAT/Regex/Parser.pm | 82 | ||||
-rw-r--r-- | lib/FLAT/Regex/Transform.pm | 18 | ||||
-rw-r--r-- | lib/FLAT/Regex/Util.pm | 33 | ||||
-rw-r--r-- | lib/FLAT/Regex/WithExtraOps.pm | 109 | ||||
-rw-r--r-- | lib/FLAT/Symbol.pm | 98 | ||||
-rw-r--r-- | lib/FLAT/Transition.pm | 66 | ||||
-rw-r--r-- | lib/FLAT/XFA.pm | 3 |
16 files changed, 3440 insertions, 0 deletions
diff --git a/lib/FLAT/CMD.pm b/lib/FLAT/CMD.pm new file mode 100644 index 0000000..5bd0e2d --- /dev/null +++ b/lib/FLAT/CMD.pm @@ -0,0 +1,533 @@ +package FLAT::CMD; +use FLAT; +use FLAT::Regex; +use FLAT::NFA; +use FLAT::DFA; +use Carp; + +=head1 NAME + +CMD - Commandline interface for the Formal Language & Automata Toolkit + +=head1 SYNOPSIS + +CMD.pm is provides an interface to the C<fash> commandline utility that offers +certain features implemented in FLAT. Consequently, this interface is also +available using the C<perl -MFLAT::CMD -e func> paradigm, but C<fash> makes +things a lot more convenient. + +=head1 USAGE + +All regular language objects in FLAT implement the following methods. +Specific regular language representations (regex, NFA, DFA) may implement +additional methods that are outlined in the repsective POD pages. + +=cut + +# Support for perl one liners - like what CPAN.pm uses #<- should move all to another file +use base 'Exporter'; #instead of: use Exporter (); @ISA = 'Exporter'; +use vars qw(@EXPORT $AUTOLOAD); + +@EXPORT = qw(compare dump dfa2gv nfa2gv pfa2gv dfa2undgv nfa2undgv pfa2undgv dfa2digraph + nfa2digraph pfa2digraph dfa2undirected nfa2undirected pfa2undirected random_pre random_re + savedfa test help + ); + +sub AUTOLOAD { + my($l) = $AUTOLOAD; + $l =~ s/.*:://; + my(%EXPORT); + @EXPORT{@EXPORT} = ''; + if (exists $EXPORT{$l}){ + FLAT::CMD->$l(@_); + } +} + +sub help { +print <<END +__________ .__ ___________.____ ___________ +\______ \ ___________| | \_ _____/| | _____\__ ___/ + | ___// __ \_ __ \ | | __) | | \__ \ | | + | | \ ___/| | \/ |__ | \ | |___ / __ \| | + |____| \___ >__| |____/ \___ / |_______ (____ /____| + \/ \/ \/ \/ + + Everything is wrt parallel regular expressions, i.e., + with the addtional shuffle operator, "&". All this + means is that you can use the ambersand (&) as a symbol + in the regular expressions you submit because it will be + detected as an operator.That said, if you avoid using + the "&" operator, you can forget about all that shuffle + business. + +%perl -MFLAT::CMD -e + "somestrings" 're1' # creates all valid strings via acyclic path, no cycles yet + "compare 're1','re2'" # comares 2 regexs | see note [2] + "dump 're1'" # dumps parse trees | see note[1] + "dfa2gv 're1'" # dumps graphviz digraph desc | see note[1] + "nfa2gv 're1'" # dumps graphviz digraph desc | see note[1] + "pfa2gv 're1'" # dumps graphviz digraph desc | see note[1] + "dfa2undgv 're1'" # dumps graphviz undirected graph desc | see note[1] + "nfa2undgv 're1'" # dumps graphviz undirected graph desc | see note[1] + "pfa2undgv 're1'" # dumps graphviz undirected graph desc | see note[1] + "dfa2digraph 're1'" # dumps directed graph without transitions + "nfa2digraph 're1'" # dumps directed graph without transitions + "pfa2digraph 're1'" # dumps directed graph without transitions + "dfa2undirected 're1'" # dumps undirected graph without transitions + "nfa2undirected 're1'" # dumps undirected graph without transitions + "pfa2undirected 're1'" # dumps undirected graph without transitions + random_pre + random_re + "savedfa 're1'" # converts PRE to min dfa, then serializes to disk + "test 'regex' 'string1'" # give a regex, reports if subsequent strings are valid + help + +NOTES: +[1] This means you could presumably do something like the following: + %perl -MFLAT -e command < text_file_with_1_regex_per_line.txt + ^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[2] This command compares the minimal DFAs of each regular expression; + if there exists a exact 1-1 mapping of symbols, states, and + transitions then the DFAs are considered equal. This means that + "abc" will be equal to "def" To make matters more confusing, "ab+ac" + would be equivalent to "xy+xz"; or worse yet, "z(x+y)". So to the + 'compare' command, "ab+ac" == "xy+xz" == "z(x+y)". This however + does not translate into the situation where "ab+ac" will accept + the same LITERAL strings as "z(x+y)" because the symbols are obviously + different. + +CREDITS: +Blockhead, CPAN.pm (for the example of how to implement these one liners), +and #perl on irc.freenode.net for pointing out something I missed when +trying to copy CPAN one liner majik. + +Perl FLAT and all included modules are released under the same terms as Perl +itself. Cheers. + +SEE: +http://www.0x743.com/flat + +END +} + +# save to a dat file +sub savedfa { + my $PRE = shift; + # neat a better way to get input via stdin + if (!$PRE) { + while (<>) { + chomp; + $PRE = $_; + last; + } + } + use FLAT::Regex::WithExtraOps; + use FLAT::PFA; + use FLAT::NFA; + use FLAT::DFA; + use Storable; + # caches results, loads them in if detexted + my $dfa = FLAT::Regex::WithExtraOps->new($PRE)->as_pfa->as_nfa->as_dfa->as_min_dfa->trim_sinks; + store $dfa, "$PRE.dat"; +} + +# dumps directed graph using Kundu notation +# Usage: +# perl -MFLAT -e "pfa2directed('a&b&c&d*e*')" +sub test { + use FLAT::Regex::WithExtraOps; + use FLAT::PFA; + use FLAT::NFA; + use FLAT::DFA; + # handles multiple strings; first is considered the regex + if (@_) + { my $FA = FLAT::Regex::WithExtraOps->new(shift @_)->as_pfa()->as_nfa->as_dfa(); + foreach (@_) + { if ($FA->is_valid_string($_)) { + print "(+): $_\n"; + } else { + print "(-): $_\n"; + } + } + } else { + my $FA; + while (<STDIN>) { + chomp; + if ($. == 1) { #<-- uses first line as regex! + $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa->as_dfa(); + } else { + if ($FA->is_valid_string($_)) { + print "(+): $_\n"; + } else { + print "(-): $_\n"; + } + } + } + } +} + +# dumps parse tree +# Usage: +# perl -MFLAT -e "dump('re1','re2',...,'reN')" +# perl -MFLAT -e dump < list_of_regexes.dat +sub dump { + use FLAT::Regex::WithExtraOps; + use Data::Dumper; + if (@_) + { foreach (@_) + { my $PRE = FLAT::Regex::WithExtraOps->new($_); + print Dumper($PRE); }} + else + { while (<STDIN>) + { chomp; + my $PRE = FLAT::Regex::WithExtraOps->new($_); + print Dumper($PRE); } + } +} + +# dumps graphviz notation +# Usage: +# perl -MFLAT -e "dfa2gv('a&b&c&d*e*')" +sub dfa2gv { + use FLAT::Regex::WithExtraOps; + use FLAT::DFA; + use FLAT::NFA; + use FLAT::PFA; + if (@_) + { foreach (@_) + { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa()->as_min_dfa()->trim_sinks(); + print $FA->as_graphviz;} } + else + { while (<STDIN>) + { chomp; + my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa()->trim_sinks(); + print $FA->as_graphviz;} + } +} + +# dumps graphviz notation +# Usage: +# perl -MFLAT -e "nfa2gv('a&b&c&d*e*')" +sub nfa2gv { + use FLAT::Regex::WithExtraOps; + use FLAT::DFA; + use FLAT::NFA; + use FLAT::PFA; + if (@_) + { foreach (@_) + { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa(); + print $FA->as_graphviz;} } + else + { while (<STDIN>) + { chomp; + my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa(); + print $FA->as_graphviz;} + } +} + +# dumps graphviz notation +# Usage: +# perl -MFLAT -e "pfa2gv('a&b&c&d*e*')" +sub pfa2gv { + use FLAT::Regex::WithExtraOps; + use FLAT::PFA; + if (@_) + { foreach (@_) + { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa(); + print $FA->as_graphviz;} } + else + { while (<STDIN>) + { chomp; + my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa(); + print $FA->as_graphviz;} + } +} + +#as_undirected_graphviz + +# dumps graphviz notation +# Usage: +# perl -MFLAT -e "dfa2undgv('a&b&c&d*e*')" +sub dfa2undgv { + use FLAT::Regex::WithExtraOps; + use FLAT::DFA; + use FLAT::NFA; + use FLAT::PFA; + if (@_) + { foreach (@_) + { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa()->as_min_dfa()->trim_sinks(); + print $FA->as_undirected_graphviz;} } + else + { while (<STDIN>) + { chomp; + my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa()->trim_sinks(); + print $FA->as_undirected_graphviz;} + } +} + +# dumps graphviz notation +# Usage: +# perl -MFLAT -e "nfa2undgv('a&b&c&d*e*')" +sub nfa2undgv { + use FLAT::Regex::WithExtraOps; + use FLAT::DFA; + use FLAT::NFA; + use FLAT::PFA; + if (@_) + { foreach (@_) + { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa(); + print $FA->as_undirected_graphviz;} } + else + { while (<STDIN>) + { chomp; + my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa(); + print $FA->as_undirected_graphviz;} + } +} + +# dumps graphviz notation +# Usage: +# perl -MFLAT -e "pfa2undgv('a&b&c&d*e*')" +sub pfa2undgv { + use FLAT::Regex::WithExtraOps; + use FLAT::PFA; + if (@_) + { foreach (@_) + { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa(); + print $FA->as_undirected_graphviz;} } + else + { while (<STDIN>) + { chomp; + my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa(); + print $FA->as_undirected_graphviz;} + } +} + +# dumps directed graph using Kundu notation +# Usage: +# perl -MFLAT -e "dfa2directed('a&b&c&d*e*')" +sub dfa2digraph { + use FLAT::Regex::WithExtraOps; + use FLAT::DFA; + use FLAT::NFA; + use FLAT::PFA; + # trims sink states from min-dfa since transitions are gone + if (@_) + { foreach (@_) + { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa->trim_sinks(); + print $FA->as_digraph;} } + else + { while (<STDIN>) + { chomp; + my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa->trim_sinks(); + print $FA->as_digraph;} + } + print "\n"; +} + +# dumps directed graph using Kundu notation +# Usage: +# perl -MFLAT -e "nfa2directed('a&b&c&d*e*')" +sub nfa2digraph { + use FLAT::Regex::WithExtraOps; + use FLAT::DFA; + use FLAT::NFA; + use FLAT::PFA; + if (@_) + { foreach (@_) + { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa(); + print $FA->as_digraph;} } + else + { while (<STDIN>) + { chomp; + my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa(); + print $FA->as_digraph;} + } + print "\n"; +} + +# dumps directed graph using Kundu notation +# Usage: +# perl -MFLAT -e "pfa2directed('a&b&c&d*e*')" +sub pfa2digraph { + use FLAT::Regex::WithExtraOps; + use FLAT::PFA; + if (@_) + { foreach (@_) + { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa(); + print $FA->as_digraph;} } + else + { while (<STDIN>) + { chomp; + my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa(); + print $FA->as_digraph;} + } + print "\n"; +} + +# dumps undirected graph using Kundu notation +# Usage: +# perl -MFLAT -e "dfa2undirected('a&b&c&d*e*')" +sub dfa2undirected { + use FLAT::Regex::WithExtraOps; + use FLAT::DFA; + use FLAT::NFA; + use FLAT::PFA; + # trims sink states from min-dfa since transitions are gone + if (@_) + { foreach (@_) + { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa->trim_sinks(); + print $FA->as_undirected;} } + else + { while (<STDIN>) + { chomp; + my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa->trim_sinks(); + print $FA->as_undirected;} + } + print "\n"; +} + +# dumps undirected graph using Kundu notation +# Usage: +# perl -MFLAT -e "nfa2undirected('a&b&c&d*e*')" +sub nfa2undirected { + use FLAT::Regex::WithExtraOps; + use FLAT::DFA; + use FLAT::NFA; + use FLAT::PFA; + if (@_) + { foreach (@_) + { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa(); + print $FA->as_undirected;} } + else + { while (<STDIN>) + { chomp; + my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa(); + print $FA->as_undirected;} + } + print "\n"; +} + +# dumps undirected graph using Kundu notation +# Usage: +# perl -MFLAT -e "pfa2undirected('a&b&c&d*e*')" +sub pfa2undirected { + use FLAT::Regex::WithExtraOps; + use FLAT::PFA; + if (@_) + { foreach (@_) + { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa(); + print $FA->as_undirected;} } + else + { while (<STDIN>) + { chomp; + my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa(); + print $FA->as_undirected;} + } + print "\n"; +} + +# compares 2 give PREs +# Usage: +# perl -MFLAT -e "compare('a','a&b&c&d*e*')" #<-- no match, btw +sub compare { + use FLAT::Regex::WithExtraOps; + use FLAT::DFA; + use FLAT::PFA; + my $PFA1 = FLAT::Regex::WithExtraOps->new(shift)->as_pfa(); + my $PFA2 = FLAT::Regex::WithExtraOps->new(shift)->as_pfa(); + my $DFA1 = $PFA1->as_nfa->as_min_dfa; + my $DFA2 = $PFA2->as_nfa->as_min_dfa; + if ($DFA1->equals($DFA2)) { + print "Yes\n"; + } else { + print "No\n"; + } +} + +# prints random PRE +# Usage: +# perl -MFLAT -e random_pre +sub random_pre { + my $and_chance = shift; + # skirt around deep recursion warning annoyance + local $SIG{__WARN__} = sub { $_[0] =~ /^Deep recursion/ or warn $_[0] }; + srand $$; + my %CMDLINEOPTS = (); + # Percent chance of each operator occuring + $CMDLINEOPTS{LENGTH} = 32; + $CMDLINEOPTS{OR} = 6; + $CMDLINEOPTS{STAR} = 10; + $CMDLINEOPTS{OPEN} = 5; + $CMDLINEOPTS{CLOSE} = 0; + $CMDLINEOPTS{n} = 1; + $CMDLINEOPTS{AND} = 10; #<-- default + $CMDLINEOPTS{AND} = $and_chance if ($and_chance == 0); #<-- to make it just an re (no shuffle) + + + my $getRandomChar = sub { + my $ch = ''; + # Get a random character between 0 and 127. + do { + $ch = int(rand 2); + } while ($ch !~ m/[a-zA-Z0-9]/); + return $ch; + }; + + my $getRandomRE = sub { + my $str = ''; + my @closeparens = (); + for (1..$CMDLINEOPTS{LENGTH}) { + $str .= $getRandomChar->(); + # % chance of an "or" + if (int(rand 100) < $CMDLINEOPTS{OR}) { + $str .= "|1"; + } elsif (int(rand 100) < $CMDLINEOPTS{AND}) { + $str .= "&0"; + } elsif (int(rand 100) < $CMDLINEOPTS{STAR}) { + $str .= "*1"; + } elsif (int(rand 100) < $CMDLINEOPTS{OPEN}) { + $str .= "("; + push(@closeparens,'0101)'); + } elsif (int(rand 100) < $CMDLINEOPTS{CLOSE} && @closeparens) { + $str .= pop(@closeparens); + } + } + # empty out @closeparens if there are still some left + if (@closeparens) { + $str .= join('',@closeparens); + } + return $str; + }; + + for (1..$CMDLINEOPTS{n}) { + print $getRandomRE->(),"\n"; + } +} + +# prints random RE (no & operator) +# Usage: +# perl -MFLAT -e random_re +sub random_re { + shift->random_pre(0); +} + +1; + +__END__ + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/FLAT/CMD/AcyclicStrings.pm b/lib/FLAT/CMD/AcyclicStrings.pm new file mode 100644 index 0000000..ebc8840 --- /dev/null +++ b/lib/FLAT/CMD/AcyclicStrings.pm @@ -0,0 +1,54 @@ +# all strings available via acyclic path from the DFA start state to any all of the final states + +package FLAT::CMD::AcyclicStrings; +use base 'FLAT::CMD'; +use FLAT; +use FLAT::Regex::WithExtraOps; +use FLAT::PFA; +use FLAT::NFA; +use FLAT::DFA; +use Storable; +use Carp; + +# Support for perl one liners - like what CPAN.pm uses #<- should move all to another file +use base 'Exporter'; #instead of: use Exporter (); @ISA = 'Exporter'; +use vars qw(@EXPORT $AUTOLOAD); + +@EXPORT = qw(as_strings); + +sub AUTOLOAD { + my($l) = $AUTOLOAD; + $l =~ s/.*:://; + my(%EXPORT); + @EXPORT{@EXPORT} = ''; + if (exists $EXPORT{$l}){ + FLAT::CMD->$l(@_); + } +} + +use vars qw(%nodes %dflabel %backtracked %low $lastDFLabel @string $dfa); +# acyclic - no cycles +sub as_strings { + my $PRE = shift; + # neat a better way to get input via stdin + if (!$PRE) { + while (<>) { + chomp; + $PRE = $_; + last; + } + } + # caches results, loads them in if detexted + my $RE = FLAT::Regex::WithExtraOps->new($PRE); + printf("%s\n",$RE->as_string()); + if (!-e "$PRE.dat") { + $dfa = $RE->as_pfa->as_nfa->as_dfa->as_min_dfa->trim_sinks; + #store $dfa, "$PRE.dat"; + } else { + print STDERR "$PRE.dat found.."; + $dfa = retrieve "$PRE.dat"; + } + $dfa->as_acyclic_strings(); +} + +1; diff --git a/lib/FLAT/CMD/DFTStrings.pm b/lib/FLAT/CMD/DFTStrings.pm new file mode 100644 index 0000000..9dc5a59 --- /dev/null +++ b/lib/FLAT/CMD/DFTStrings.pm @@ -0,0 +1,55 @@ +# all strings available via depth first traversal, including back edges that happen to land on an accepting state + +package FLAT::CMD::DFTStrings; +use base 'FLAT::CMD'; +use FLAT; +use FLAT::Regex::WithExtraOps; +use FLAT::PFA; +use FLAT::NFA; +use FLAT::DFA; +use Storable; +use Carp; + +# Support for perl one liners - like what CPAN.pm uses #<- should move all to another file +use base 'Exporter'; #instead of: use Exporter (); @ISA = 'Exporter'; +use vars qw(@EXPORT $AUTOLOAD); + +@EXPORT = qw(as_strings); + +sub AUTOLOAD { + my($l) = $AUTOLOAD; + $l =~ s/.*:://; + my(%EXPORT); + @EXPORT{@EXPORT} = ''; + if (exists $EXPORT{$l}){ + FLAT::CMD->$l(@_); + } +} + +use vars qw(%nodes %dflabel %backtracked %low $lastDFLabel @string $dfa); +# acyclic - no cycles +sub as_strings { + my $PRE = shift; + # neat a better way to get input via stdin + if (!$PRE) { + while (<>) { + chomp; + $PRE = $_; + last; + } + } + # caches results, loads them in if detexted + my $RE = FLAT::Regex::WithExtraOps->new($PRE); + printf("%s\n",$RE->as_string()); + if (!-e "$PRE.dat") { + $dfa = $RE->as_pfa->as_nfa->as_dfa->as_min_dfa->trim_sinks; + #store $dfa, "$PRE.dat"; + } else { + print STDERR "$PRE.dat found.."; + $dfa = retrieve "$PRE.dat"; + } + + $dfa->as_dft_strings(shift); +} + +1; diff --git a/lib/FLAT/DFA.pm b/lib/FLAT/DFA.pm new file mode 100644 index 0000000..f96c9fb --- /dev/null +++ b/lib/FLAT/DFA.pm @@ -0,0 +1,557 @@ +package FLAT::DFA; + +use strict; +use base 'FLAT::NFA'; +use Storable qw(dclone); +use Carp; +$|++; + +sub set_starting { + my $self = shift; + $self->SUPER::set_starting(@_); + + my $num = () = $self->get_starting; + confess "DFA must have exactly one starting state" + if $num != 1; +} + +sub complement { + my $self = $_[0]->clone; + + for my $s ($self->get_states) { + $self->is_accepting($s) + ? $self->unset_accepting($s) + : $self->set_accepting($s); + } + + return $self; +} + +sub _TUPLE_ID { join "\0", @_ } +sub _uniq { my %seen; grep { !$seen{$_}++ } @_; } + +## this method still needs more work.. +sub intersect { + my @dfas = map { $_->as_dfa } @_; + + my $return = FLAT::DFA->new; + my %newstates; + my @alpha = _uniq( map { $_->alphabet } @dfas ); + + $_->_extend_alphabet(@alpha) for @dfas; + + my @start = map { $_->get_starting } @dfas; + my $start = $newstates{ _TUPLE_ID(@start) } = $return->add_states(1); + $return->set_starting($start); + $return->set_accepting($start) + if ! grep { ! $dfas[$_]->is_accepting( $start[$_] ) } 0 .. $#dfas; + + my @queue = (\@start); + while (@queue) { + my @tuple = @{ shift @queue }; + + for my $char (@alpha) { + my @next = map { $dfas[$_]->successors( $tuple[$_], $char ) } + 0 .. $#dfas; + + #warn "[@tuple] --> [@next] via $char\n"; + + if (not exists $newstates{ _TUPLE_ID(@next) }) { + my $s = $newstates{ _TUPLE_ID(@next) } = $return->add_states(1); + $return->set_accepting($s) + if ! grep { ! $dfas[$_]->is_accepting( $next[$_] ) } 0 .. $#dfas; + push @queue, \@next; + } + + $return->add_transition( $newstates{ _TUPLE_ID(@tuple) }, + $newstates{ _TUPLE_ID(@next) }, + $char ); + } + } + + return $return; +} + +# this is meant to enforce 1 starting state for a DFA, but it is getting us into trouble +# when a DFA object calls unset_starting +sub unset_starting { + my $self = shift; + $self->SUPER::unset_starting(@_); + my $num = () = $self->unset_starting; + croak "DFA must have exactly one starting state" + if $num != 1; +} + +#### transformations + +sub trim_sinks { + my $self = shift; + my $result = $self->clone(); + foreach my $state ($self->array_complement([$self->get_states()],[$self->get_accepting()])) { + my @ret = $self->successors($state,[$self->alphabet]); + if (@ret) { + if ($ret[0] == $state) { + $result->delete_states($state) if ($result->is_state($state)); + } + } + } + return $result; +} + +sub as_min_dfa { + + my $self = shift()->clone; + my $N = $self->num_states; + my @alphabet = $self->alphabet; + + my ($start) = $self->get_starting; + my %final = map { $_ => 1 } $self->get_accepting; + + my @equiv = map [ (0) x ($_+1), (1) x ($N-$_-1) ], 0 .. $N-1; + + while (1) { + my $changed = 0; + for my $s1 (0 .. $N-1) { + for my $s2 (grep { $equiv[$s1][$_] } 0 .. $N-1) { + + if ( 1 == grep defined, @final{$s1, $s2} ) { + $changed = 1; + $equiv[$s1][$s2] = 0; + next; + } + + for my $char (@alphabet) { + my @t = sort { $a <=> $b } $self->successors([$s1,$s2], $char); + next if @t == 1; + + if (not $equiv[ $t[0] ][ $t[1] ]) { + $changed = 1; + $equiv[$s1][$s2] = 0; + } + } + }} + + last if !$changed; + } + my $result = (ref $self)->new; + my %newstate; + my @classes; + for my $s (0 .. $N-1) { + next if exists $newstate{$s}; + + my @c = ( $s, grep { $equiv[$s][$_] } 0 .. $N-1 ); + push @classes, \@c; + + @newstate{@c} = ( $result->add_states(1) ) x @c; + } + + for my $c (@classes) { + my $s = $c->[0]; + for my $char (@alphabet) { + my ($next) = $self->successors($s, $char); + $result->add_transition( $newstate{$s}, $newstate{$next}, $char ); + } + } + + $result->set_starting( $newstate{$start} ); + $result->set_accepting( $newstate{$_} ) + for $self->get_accepting; + + $result; + +} + +# the validity of a given string <-- executes symbols over DFA +# if there is not transition for given state and symbol, it fails immediately +# if the current state we're in is not final when symbols are exhausted, then it fails + +sub is_valid_string { + my $self = shift; + my $string = shift; + chomp $string; + my $OK = undef; + my @stack = split('',$string); + # this is confusing all funcs return arrays + my @current = $self->get_starting(); + my $current = pop @current; + foreach (@stack) { + my @next = $self->successors($current,$_); + if (!@next) { + return $OK; #<--returns undef bc no transition found + } + $current = $next[0]; + } + $OK++ if ($self->is_accepting($current)); + return $OK; +} + +# +# Experimental!! +# + +# DFT stuff in preparation for DFA pump stuff; +sub as_node_list { + my $self = shift; + my %node = (); + for my $s1 ($self->get_states) { + $node{$s1} = {}; # initialize + for my $s2 ($self->get_states) { + my $t = $self->get_transition($s1, $s2); + if (defined $t) { + # array of symbols that $s1 will go to $s2 on... + push(@{$node{$s1}{$s2}},split(',',$t->as_string)); + } + } + } + return %node; +} + +sub as_acyclic_strings { + my $self = shift; + my %dflabel = (); # lookup table for dflable + my %backtracked = (); # lookup table for backtracked edges + my $lastDFLabel = 0; + my @string = (); + my %nodes = $self->as_node_list(); + # output format is the actual PRE followed by all found strings + $self->acyclic($self->get_starting(),\%dflabel,$lastDFLabel,\%nodes,\@string); +} + +sub acyclic { + my $self = shift; + my $startNode = shift; + my $dflabel_ref = shift; + my $lastDFLabel = shift; + my $nodes = shift; + my $string = shift; + # tree edge detection + if (!exists($dflabel_ref->{$startNode})) { + $dflabel_ref->{$startNode} = ++$lastDFLabel; # the order inwhich this link was explored + foreach my $adjacent (keys(%{$nodes->{$startNode}})) { + if (!exists($dflabel_ref->{$adjacent})) { # initial tree edge + foreach my $symbol (@{$nodes->{$startNode}{$adjacent}}) { + push(@{$string},$symbol); + $self->acyclic($adjacent,\%{$dflabel_ref},$lastDFLabel,\%{$nodes},\@{$string}); + if ($self->array_is_subset([$adjacent],[$self->get_accepting()])) { #< proof of concept + printf("%s\n",join('',@{$string})); + } + pop(@{$string}); + } + } + } + } + # remove startNode entry to facilitate acyclic path determination + delete($dflabel_ref->{$startNode}); + #$lastDFLabel--; + return; +}; + +sub as_dft_strings { + my $self = shift; + my $depth = 1; + $depth = shift if (1 < $_[0]); + my %dflabel = (); # scoped lookup table for dflable + my %nodes = $self->as_node_list(); + foreach (keys(%nodes)) { + $dflabel{$_} = []; # initialize container (array) for multiple dflables for each node + } + my $lastDFLabel = 0; + my @string = (); + $self->dft($self->get_starting(),[$self->get_accepting()],\%dflabel,$lastDFLabel,\%nodes,\@string,$depth); +} + +sub dft { + my $self = shift; + my $startNode = shift; + my $goals_ref = shift; + my $dflabel_ref = shift; + my $lastDFLabel = shift; + my $nodes = shift; + my $string = shift; + my $DEPTH = shift; + # add start node to path + my $c1 = @{$dflabel_ref->{$startNode}}; # get number of elements + if ($DEPTH >= $c1) { + push(@{$dflabel_ref->{$startNode}},++$lastDFLabel); + foreach my $adjacent (keys(%{$nodes->{$startNode}})) { + my $c2 = @{$dflabel_ref->{$adjacent}}; + if ($DEPTH > $c2) { # "initial" tree edge + foreach my $symbol (@{$nodes->{$startNode}{$adjacent}}) { + push(@{$string},$symbol); + $self->dft($adjacent,[@{$goals_ref}],$dflabel_ref,$lastDFLabel,$nodes,[@{$string}],$DEPTH); + # assumes some base path found + if ($self->array_is_subset([$adjacent],[@{$goals_ref}])) { + printf("%s\n",join('',@{$string})); + } + pop(@{$string}); + } + } + } # remove startNode entry to facilitate acyclic path determination + pop(@{$dflabel_ref->{$startNode}}); + $lastDFLabel--; + } +}; + +# +# String gen using iterators (still experimental) +# + +sub get_acyclic_sub { + my $self = shift; + my ($start,$nodelist_ref,$dflabel_ref,$string_ref,$accepting_ref,$lastDFLabel) = @_; + my @ret = (); + foreach my $adjacent (keys(%{$nodelist_ref->{$start}})) { + $lastDFLabel++; + if (!exists($dflabel_ref->{$adjacent})) { + $dflabel_ref->{$adjacent} = $lastDFLabel; + foreach my $symbol (@{$nodelist_ref->{$start}{$adjacent}}) { + push(@{$string_ref},$symbol); + my $string_clone = dclone($string_ref); + my $dflabel_clone = dclone($dflabel_ref); + push(@ret,sub { return $self->get_acyclic_sub($adjacent,$nodelist_ref,$dflabel_clone,$string_clone,$accepting_ref,$lastDFLabel); }); + pop @{$string_ref}; + } + } + + } + return {substack=>[@ret], + lastDFLabel=>$lastDFLabel, + string => ($self->array_is_subset([$start],[@{$accepting_ref}]) ? join('',@{$string_ref}) : undef)}; +} +sub init_acyclic_iterator { + my $self = shift; + my %dflabel = (); + my @string = (); + my $lastDFLabel = 0; + my %nodelist = $self->as_node_list(); + my @accepting = $self->get_accepting(); + # initialize + my @substack = (); + my $r = $self->get_acyclic_sub($self->get_starting(),\%nodelist,\%dflabel,\@string,\@accepting,$lastDFLabel); + push(@substack,@{$r->{substack}}); + return sub { + while (1) { + if (!@substack) { + return undef; + } + my $s = pop @substack; + my $r = $s->(); + push(@substack,@{$r->{substack}}); + if ($r->{string}) { + return $r->{string}; + } + } + } +} + +sub new_acyclic_string_generator { + my $self = shift; + return $self->init_acyclic_iterator(); +} + +sub get_deepdft_sub { + my $self = shift; + my ($start,$nodelist_ref,$dflabel_ref,$string_ref,$accepting_ref,$lastDFLabel,$max) = @_; + my @ret = (); + my $c1 = @{$dflabel_ref->{$start}}; + if ($c1 < $max) { + push(@{$dflabel_ref->{$start}},++$lastDFLabel); + foreach my $adjacent (keys(%{$nodelist_ref->{$start}})) { + my $c2 = @{$dflabel_ref->{$adjacent}}; + if ($c2 < $max) { + foreach my $symbol (@{$nodelist_ref->{$start}{$adjacent}}) { + push(@{$string_ref},$symbol); + my $string_clone = dclone($string_ref); + my $dflabel_clone = dclone($dflabel_ref); + push(@ret,sub { return $self->get_deepdft_sub($adjacent,$nodelist_ref,$dflabel_clone,$string_clone,$accepting_ref,$lastDFLabel,$max); }); + pop @{$string_ref}; + } + } + } + } + return {substack=>[@ret], lastDFLabel=>$lastDFLabel, string => ($self->array_is_subset([$start],[@{$accepting_ref}]) ? join('',@{$string_ref}) : undef)}; +} + +sub init_deepdft_iterator { + my $self = shift; + my $MAXLEVEL = shift; + my %dflabel = (); + my @string = (); + my $lastDFLabel = 0; + my %nodelist = $self->as_node_list(); + foreach my $node (keys(%nodelist)) { + $dflabel{$node} = []; # initializes anonymous arrays for all nodes + } + my @accepting = $self->get_accepting(); + # initialize + my @substack = (); + my $r = $self->get_deepdft_sub($self->get_starting(),\%nodelist,\%dflabel,\@string,\@accepting,$lastDFLabel,$MAXLEVEL); + push(@substack,@{$r->{substack}}); + return sub { + while (1) { + if (!@substack) { + return undef; + } + my $s = pop @substack; + my $r = $s->(); + push(@substack,@{$r->{substack}}); + if ($r->{string}) { + return $r->{string}; + } + } + } +} + +sub new_deepdft_string_generator { + my $self = shift; + my $MAXLEVEL = (@_ ? shift : 1); + return $self->init_deepdft_iterator($MAXLEVEL); +} + +1; + +__END__ + +=head1 NAME + +FLAT::DFA - Deterministic finite automata + +=head1 SYNOPSIS + +A FLAT::DFA object is a finite automata whose transitions are labeled +with single characters. Furthermore, each state has exactly one outgoing +transition for each available label/character. + +=head1 USAGE + +In addition to implementing the interface specified in L<FLAT> and L<FLAT::NFA>, +FLAT::DFA objects provide the following DFA-specific methods: + +=over + +=item $dfa-E<gt>unset_starting + +Because a DFA, by definition, must have only ONE starting state, this allows one to unset +the current start state so that a new one may be set. + +=item $dfa-E<gt>trim_sinks + +This method returns a FLAT::DFA (though in theory an NFA) that is lacking a transition for +all symbols from all states. This method eliminates all transitions from all states that lead +to a sink state; it also eliminates the sink state. + +This has no affect on testing if a string is valid using C<FLAT::DFA::is_valid_string>, +discussed below. + +=item $dfa-E<gt>as_min_dfa + +This method minimizes the number of states and transitions in the given DFA. The modifies +the current/calling DFA object. + +=item $dfa-E<gt>is_valid_string($string) + +This method tests if the given string is accepted by the DFA. + +=item $dfa-E<gt>as_node_list + +This method returns a node list in the form of a hash. This node list may be viewed as a +pure digraph, and is lacking in state names and transition symbols. + +=item $dfa-E<gt>as_acyclic_strings + +The method is B<deprecated>, and it is suggested that one not use it. It returns all +valid strings accepted by the DFA by exploring all acyclic paths that go from the start +state and end in an accepting state. The issue with this method is that it finds and +returns all strings at once. The iterator described below is much more ideal for actual +use in an application. + +=item $dfa-E<gt>as_dft_strings($depth) + +The method is B<deprecated>, and it is suggested that one not use it. It returns all +valid strings accepted by the DFA using a depth first traversal. A valid string is formed +when the traversal detects an accepting state, whether it is a terminal node or a node reached +via a back edge. The issue with this method is that it finds and returns all strings at once. +The iterator described below is much more ideal for actual use in an application. + +The argument, C<$depth> specifies how many times the traversal may actually pass through +a previously visited node. It is therefore possible to safely explore DFAs that accept +infinite languages. + +=item $dfa-E<gt>new_acyclic_string_generator + +This allows one to initialize an iterator that returns a valid string on each successive +call of the sub-ref that is returned. It returns all valid strings accepted by the DFA by +exploring all acyclic paths that go from the start state and end in an accepting state. + +Example: + + #!/usr/bin/env perl + use strict; + use FLAT::DFA; + use FLAT::NFA; + use FLAT::PFA; + use FLAT::Regex::WithExtraOps; + + my $PRE = "abc&(def)*"; + my $dfa = FLAT::Regex::WithExtraOps->new($PRE)->as_pfa->as_nfa->as_dfa->as_min_dfa->trim_sinks; + my $next = $dfa->new_acyclic_string_generator; + print "PRE: $PRE\n"; + print "Acyclic:\n"; + while (my $string = $next->()) { + print " $string\n"; + } + +=item $dfa-E<gt>new_deepdft_string_generator($depth) + +This allows one to initialize an iterator that returns a valid string on each successive +call of the sub-ref that is returned. It returns all valid strings accepted by the DFA using a +depth first traversal. A valid string is formed when the traversal detects an accepting state, +whether it is a terminal node or a node reached via a back edge. + +The argument, C<$depth> specifies how many times the traversal may actually pass through +a previously visited node. It is therefore possible to safely explore DFAs that accept +infinite languages. + + #!/usr/bin/env perl + use strict; + use FLAT::DFA; + use FLAT::NFA; + use FLAT::PFA; + use FLAT::Regex::WithExtraOps; + + my $PRE = "abc&(def)*"; + my $dfa = FLAT::Regex::WithExtraOps->new($PRE)->as_pfa->as_nfa->as_dfa->as_min_dfa->trim_sinks; + my $next = $dfa->new_deepdft_string_generator(); + print "Deep DFT (default):\n"; + for (1..10) { + while (my $string = $next->()) { + print " $string\n"; + last; + } + } + + $next = $dfa->new_deepdft_string_generator(5); + print "Deep DFT (5):\n"; + for (1..10) { + while (my $string = $next->()) { + print " $string\n"; + last; + } + } + +=back + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. diff --git a/lib/FLAT/FA.pm b/lib/FLAT/FA.pm new file mode 100644 index 0000000..dd77f50 --- /dev/null +++ b/lib/FLAT/FA.pm @@ -0,0 +1,554 @@ +package FLAT::FA; + +use strict; +use base 'FLAT'; +use Carp; + +use FLAT::Transition; + +=head1 NAME + +FLAT::FA - Base class for regular finite automata + +=head1 SYNOPSIS + +A FLAT::FA object is a collection of states and transitions. Each state +may be labeled as starting or accepting. Each transition between states +is labeled with a transition object. + +=head1 USAGE + +FLAT::FA is a superclass that is not intended to be used directly. However, +it does provide the following methods: + +=cut + +sub new { + my $pkg = shift; + bless { + STATES => [], + TRANS => [], + ALPHA => {} + }, $pkg; +} + +sub get_states { + my $self = shift; + return 0 .. ($self->num_states - 1); +} + +sub num_states { + my $self = shift; + return scalar @{ $self->{STATES} }; +} + +sub is_state { + my ($self, $state) = @_; + exists $self->{STATES}->[$state]; +} + +sub _assert_states { + my ($self, @states) = @_; + for (@states) { + croak "'$_' is not a state" if not $self->is_state($_); + } +} +sub _assert_non_states { + my ($self, @states) = @_; + for (@states) { + croak "There is already a state called '$_'" if $self->is_state($_); + } +} + +sub delete_states { + my ($self, @states) = @_; + + $self->_assert_states(@states); + + for my $s ( sort { $b <=> $a } @states ) { + $self->_decr_alphabet($_) + for @{ splice @{ $self->{TRANS} }, $s, 1 }; + + $self->_decr_alphabet( splice @$_, $s, 1 ) + for @{ $self->{TRANS} }; + + splice @{ $self->{STATES} }, $s, 1; + } +} + +sub add_states { + my ($self, $num) = @_; + my $id = $self->num_states; + + for my $s ( $id .. ($id+$num-1) ) { + push @$_, undef for @{ $self->{TRANS} }; + push @{ $self->{TRANS} }, [ (undef) x ($s+1) ]; + push @{ $self->{STATES} }, { + starting => 0, + accepting => 0 + }; + } + + return wantarray ? ($id .. ($id+$num-1)) + : $id+$num-1; +} + +############## + +sub is_starting { + my ($self, $state) = @_; + $self->_assert_states($state); + return $self->{STATES}[$state]{starting}; +} +sub set_starting { + my ($self, @states) = @_; + $self->_assert_states(@states); + $self->{STATES}[$_]{starting} = 1 for @states; +} +sub unset_starting { + my ($self, @states) = @_; + $self->_assert_states(@states); + $self->{STATES}[$_]{starting} = 0 for @states; +} +sub get_starting { + my $self = shift; + return grep { $self->is_starting($_) } $self->get_states; +} + +############## + +sub is_accepting { + my ($self, $state) = @_; + $self->_assert_states($state); + return $self->{STATES}[$state]{accepting}; +} +sub set_accepting { + my ($self, @states) = @_; + $self->_assert_states(@states); + $self->{STATES}[$_]{accepting} = 1 for @states; +} +sub unset_accepting { + my ($self, @states) = @_; + $self->_assert_states(@states); + $self->{STATES}[$_]{accepting} = 0 for @states; +} +sub get_accepting { + my $self = shift; + return grep { $self->is_accepting($_) } $self->get_states; +} + +############### + +sub _decr_alphabet { + my ($self, $t) = @_; + return if not defined $t; + for ($t->alphabet) { + delete $self->{ALPHA}{$_} if not --$self->{ALPHA}{$_}; + } +} +sub _incr_alphabet { + my ($self, $t) = @_; + return if not defined $t; + $self->{ALPHA}{$_}++ for $t->alphabet; +} + +sub set_transition { + my ($self, $state1, $state2, @label) = @_; + $self->remove_transition($state1, $state2); + + @label = grep defined, @label; + return if not @label; + + my $t = $self->{TRANS_CLASS}->new(@label); + $self->_incr_alphabet($t); + + $self->{TRANS}[$state1][$state2] = $t; +} + +sub add_transition { + my ($self, $state1, $state2, @label) = @_; + + @label = grep defined, @label; + return if not @label; + + my $t = $self->get_transition($state1, $state2); + $self->_decr_alphabet($t); + + if (!$t) { + $t = $self->{TRANS}[$state1][$state2] = $self->{TRANS_CLASS}->new; + } + + $t->add(@label); + $self->_incr_alphabet($t); +} + +sub get_transition { + my ($self, $state1, $state2) = @_; + $self->_assert_states($state1, $state2); + + $self->{TRANS}[$state1][$state2]; +} + +sub remove_transition { + my ($self, $state1, $state2) = @_; + + $self->_decr_alphabet( $self->{TRANS}[$state1][$state2] ); + $self->{TRANS}[$state1][$state2] = undef; +} + +# given a state and a symbol, it tells you +# what the next state(s) are; do get successors +# for find the successors for a set of symbols, +# use array refs. For example: +# @NEXT=$self->successors([@nodes],[@symbols]); +sub successors { + my ($self, $state, $symb) = @_; + + my @states = ref $state eq 'ARRAY' ? @$state : ($state); + my @symbs = defined $symb + ? (ref $symb eq 'ARRAY' ? @$symb : ($symb)) + : (); + + $self->_assert_states(@states); + + my %succ; + for my $s (@states) { + $succ{$_}++ + for grep { my $t = $self->{TRANS}[$s][$_]; + defined $t && (@symbs ? $t->does(@symbs) : 1) } $self->get_states; + } + + return keys %succ; +} + +sub predecessors { + my $self = shift; + $self->clone->reverse->successors(@_); +} + +# reverse - no change from NFA +sub reverse { + my $self = $_[0]->clone; + $self->_transpose; + + my @start = $self->get_starting; + my @final = $self->get_accepting; + + $self->unset_accepting( $self->get_states ); + $self->unset_starting( $self->get_states ); + + $self->set_accepting( @start ); + $self->set_starting( @final ); + + $self; +} + +# get an array of all symbols +sub alphabet { + my $self = shift; + grep length, keys %{ $self->{ALPHA} }; +} + +# give an array of symbols, return the symbols that +# are in the alphabet +#sub is_in_alphabet { +# my $self = shift; +# my $ +#} + +############ +sub prune { + my $self = shift; + + my @queue = $self->get_starting; + my %seen = map { $_ => 1 } @queue; + + while (@queue) { + @queue = grep { ! $seen{$_}++ } $self->successors(\@queue); + } + + my @useless = grep { !$seen{$_} } $self->get_states; + $self->delete_states(@useless); + + return @useless; +} + + +############ + +use Storable 'dclone'; +sub clone { + dclone( $_[0] ); +} + +sub _transpose { + my $self = shift; + my $N = $self->num_states - 1; + + $self->{TRANS} = [ + map { + my $row = $_; + [ map { $_->[$row] } @{$self->{TRANS}} ] + } 0 .. $N + ]; +} + +# tests to see if set1 is a subset of set2 +sub array_is_subset { + my $self = shift; + my $set1 = shift; + my $set2 = shift; + my $ok = 1; + my %setcount = (); + foreach ($self->array_unique(@{$set1}),$self->array_unique(@{$set2})) { + $setcount{$_}++; + } + foreach ($self->array_unique(@{$set1})) { + if ($setcount{$_} != 2) { + $ok = 0; + last; + } + } + return $ok; +} + +sub array_unique { + my $self = shift; + my %ret = (); + foreach (@_) { + $ret{$_}++; + } + return keys(%ret); +} + +sub array_complement { + my $self = shift; + my $set1 = shift; + my $set2 = shift; + my @ret = (); + # convert set1 to a hash + my %set1hash = map {$_ => 1} @{$set1}; + # iterate of set2 and test if $set1 + foreach (@{$set2}) { + if (!defined $set1hash{$_}) { + push(@ret,$_); + } + } + ## Now do the same using $set2 + # convert set2 to a hash + my %set2hash = map {$_ => 1} @{$set2}; + # iterate of set1 and test if $set1 + foreach (@{$set1}) { + if (!defined $set2hash{$_}) { + push(@ret,$_); + } + } + # now @ret contains all items in $set1 not in $set 2 and all + # items in $set2 not in $set1 + return @ret; +} + +# returns all items that 2 arrays have in common +sub array_intersect { + my $self = shift; + my $set1 = shift; + my $set2 = shift; + my %setcount = (); + my @ret = (); + foreach ($self->array_unique(@{$set1})) { + $setcount{$_}++; + } + foreach ($self->array_unique(@{$set2})) { + $setcount{$_}++; + push(@ret,$_) if ($setcount{$_} > 1); + } + return @ret; +} + +# given a set of symbols, returns only the valid ones +sub get_valid_symbols { + my $self = shift; + my $symbols = shift; + return $self->array_intersect([$self->alphabet()],[@{$symbols}]) +} + +## add an FA's states & transitions to this FA (as disjoint union) +sub _swallow { + my ($self, $other) = @_; + my $N1 = $self->num_states; + my $N2 = $other->num_states; + + push @$_, (undef) x $N2 + for @{ $self->{TRANS} }; + + push @{ $self->{TRANS} }, [ (undef) x $N1, @{ clone $_ } ] + for @{ $other->{TRANS} }; + + push @{ $self->{STATES} }, @{ clone $other->{STATES} }; + + $self->{ALPHA}{$_} += $other->{ALPHA}{$_} + for keys %{ $other->{ALPHA} }; + + return map { $_ + $N1 } $other->get_states; +} + +1; + +__END__ + + +=head2 Manipulation & Inspection Of States + +=over + +=item $fa-E<gt>get_states + +Returns a list of all the state "names" in $fa. + +=item $fa-E<gt>num_states + +Returns the number of states in $fa. + +=item $fa-E<gt>is_state($state_id) + +Returns a boolean indicating whether $state_id is a recognized state "name." + +=item $fa-E<gt>delete_states(@states) + +Deletes the states given in @states and their corresponding transitions. The +remaining states in the FA may be "renamed" (renumbered)! Return value not +used. + +=item $fa-E<gt>add_states($num) + +Adds $num states to $fa, and returns a list of the new state "names." + +=item $fa-E<gt>get_starting + +=item $fa-E<gt>get_accepting + +Returns a list of all the states which are labeled as starting/accepting, +respectively. + +=item $fa-E<gt>set_accepting(@states) + +=item $fa-E<gt>unset_accepting(@states) + +=item $fa-E<gt>set_starting(@states) + +=item $fa-E<gt>unset_starting(@states) + +Sets/unsets a list of states as being labeled starting/accepting, +respectively. + +=item $fa-E<gt>is_starting($state) + +=item $fa-E<gt>is_accepting($state) + +Returns a boolean indicating whether $state is labeled as starting/accepting, +respectively. + +=item $fa-E<gt>prune + +Deletes the states which are not reachable (via zero or more transitions) +from starting states. Returns a list of the "names" of states that were +deleted. + +=back + +=head2 Manipulation & Inspection Of Transitions + +Each transition between states is a transition object, which knows how +to organize several "labels." Think of this as the mechanism by which +multiple arrows in the state diagram between the same states are collapsed +to a single arrow. This interface is abstracted away into the following +public methods: + +=over + +=item $fa-E<gt>set_transition($state1, $state2, @labels) + +Resets the transition between $state1 and $state2 to a transition +initialized using data @labels. If @labels is omitted or contains +only undefined elements, then the call is equivalent to C<remove_transition>. + +=item $fa-E<gt>add_transition($state1, $state2, @labels) + +Adds @labels to the transition between $state1 and $state2. + +=item $fa-E<gt>get_transition($state1, $state2) + +Returns the transition object stored between $state1 and $state2, or +undef if there is no transition. + +=item $fa-E<gt>remove_transition($state1, $state2) + +Removes the transition object between $state1 and $state2. + +=item $fa-E<gt>successors(\@states) + +=item $fa-E<gt>successors($state) + +=item $fa-E<gt>successors(\@states, $label) + +=item $fa-E<gt>successors($state, $label) + +=item $fa-E<gt>successors(\@states, \@labels) + +=item $fa-E<gt>successors($state, \@labels) + +Given a state/set of states, and one or more labels, returns a list of +the states (without duplicates) reachable from the states via a single +transition having any of the given labels. If no labels are given, returns +the states reachable by any (single) transition. + +Note that this method makes no distinction for epsilon transitions, these +are only special in FLAT::NFA objects. + +=item $fa-E<gt>alphabet + +Returns the list of characters (without duplicates) used among all +transition labels in the automaton. + +=back + +=head2 Conversions To External Formats + +=over + +=item $fa-E<gt>as_graphviz + +Returns a string containing a GraphViz (dot) description of the automaton, +suitable for rendering with your favorite GraphViz layout engine. + +=item $fa-E<gt>as_summary + +Returns a string containing a plaintext description of the automaton, +suitable for debugging purposes. + +=back + +=head2 Miscellaneous + +=over + +=item $fa-E<gt>clone + +Returns an identical copy of $fa. + +=back + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. diff --git a/lib/FLAT/NFA.pm b/lib/FLAT/NFA.pm new file mode 100644 index 0000000..344ea76 --- /dev/null +++ b/lib/FLAT/NFA.pm @@ -0,0 +1,509 @@ +package FLAT::NFA; + +use strict; +use base 'FLAT::FA'; + +use FLAT::Transition; + +=head1 NAME + +FLAT::NFA - Nondeterministic finite automata + +=head1 SYNOPSIS + +A FLAT::NFA object is a finite automata whose transitions are labeled +either with characters or the empty string (epsilon). + +=cut + +sub new { + my $pkg = shift; + my $self = $pkg->SUPER::new(@_); + $self->{TRANS_CLASS} = "FLAT::Transition"; + return $self; +} + +sub singleton { + my ($class, $char) = @_; + my $nfa = $class->new; + + if (not defined $char) { + $nfa->add_states(1); + $nfa->set_starting(0); + } elsif ($char eq "") { + $nfa->add_states(1); + $nfa->set_starting(0); + $nfa->set_accepting(0); + } else { + $nfa->add_states(2); + $nfa->set_starting(0); + $nfa->set_accepting(1); + $nfa->set_transition(0, 1, $char); + } + return $nfa; +} + +sub as_nfa { $_[0]->clone } + +sub union { + my @nfas = map { $_->as_nfa } @_; + my $result = $nfas[0]->clone; + $result->_swallow($_) for @nfas[1 .. $#nfas]; + $result; +} + +sub concat { + my @nfas = map { $_->as_nfa } @_; + + my $result = $nfas[0]->clone; + my @newstate = ([ $result->get_states ]); + my @start = $result->get_starting; + + for (1 .. $#nfas) { + push @newstate, [ $result->_swallow( $nfas[$_] ) ]; + } + + $result->unset_accepting($result->get_states); + $result->unset_starting($result->get_states); + $result->set_starting(@start); + + for my $nfa_id (1 .. $#nfas) { + for my $s1 ($nfas[$nfa_id-1]->get_accepting) { + for my $s2 ($nfas[$nfa_id]->get_starting) { + $result->set_transition( + $newstate[$nfa_id-1][$s1], + $newstate[$nfa_id][$s2], "" ); + }} + } + + $result->set_accepting( + @{$newstate[-1]}[ $nfas[-1]->get_accepting ] ); + + $result; +} + +sub kleene { + my $result = $_[0]->clone; + + my ($newstart, $newfinal) = $result->add_states(2); + + $result->set_transition($newstart, $_, "") + for $result->get_starting; + $result->unset_starting( $result->get_starting ); + $result->set_starting($newstart); + + $result->set_transition($_, $newfinal, "") + for $result->get_accepting; + $result->unset_accepting( $result->get_accepting ); + $result->set_accepting($newfinal); + + $result->set_transition($newstart, $newfinal, ""); + $result->set_transition($newfinal, $newstart, ""); + + $result; +} + +sub reverse { + my $self = $_[0]->clone; + $self->_transpose; + + my @start = $self->get_starting; + my @final = $self->get_accepting; + + $self->unset_accepting( $self->get_states ); + $self->unset_starting( $self->get_states ); + + $self->set_accepting( @start ); + $self->set_starting( @final ); + + $self; +} + +########### + +sub is_empty { + my $self = shift; + + my @queue = $self->get_starting; + my %seen = map { $_ => 1 } @queue; + + while (@queue) { + return 0 if grep { $self->is_accepting($_) } @queue; + @queue = grep { !$seen{$_}++ } $self->successors(\@queue); + } + return 1; +} + +sub is_finite { + my $self = shift; + + my @alphabet = $self->alphabet; + return 1 if @alphabet == 0; + + my @queue = $self->get_starting; + my %seen = map { $_ => 1 } @queue; + + while (@queue) { + @queue = grep { !$seen{$_}++ } $self->successors(\@queue); + } + + for my $s ( grep { $self->is_accepting($_) } keys %seen ) { + @queue = $self->epsilon_closure($s); + %seen = map { $_ => 1 } @queue; + + while (@queue) { + my @next = $self->epsilon_closure( + $self->successors(\@queue, \@alphabet) ); + + return 0 if grep { $s eq $_ } @next; + @queue = grep { !$seen{$_}++ } @next; + } + } + return 1; +} + +sub epsilon_closure { + my ($self, @states) = @_; + my %seen = map { $_ => 1 } @states; + my @queue = @states; + + while (@queue) { + @queue = grep { ! $seen{$_}++ } $self->successors( \@queue, "" ); + } + + keys %seen; +} + + +sub contains { + my ($self, $string) = @_; + + my @active = $self->epsilon_closure( $self->get_starting ); + for my $char (split //, $string) { + return 0 if ! @active; + @active = $self->epsilon_closure( $self->successors(\@active, $char) ); + } + return !! grep { $self->is_accepting($_) } @active; +} + +sub trace { + my ($self, $string) = @_; + + my @trace = ([ $self->epsilon_closure( $self->get_starting ) ]); + + for my $char (split //, $string) { + push @trace, + [ $self->epsilon_closure( $self->successors($trace[-1], $char) ) ]; + } + return @trace; +} +############ + +sub _extend_alphabet { + my ($self, @alpha) = @_; + + my %alpha = map { $_ => 1 } @alpha; + delete $alpha{$_} for $self->alphabet; + + return if not keys %alpha; + + my $trash = $self->add_states(1); + for my $state ($self->get_states) { + next if $state eq $trash; + for my $char (keys %alpha) { + $self->add_transition($state, $trash, $char); + } + } + $self->add_transition($trash, $trash, $self->alphabet); +} + +######## transformations + +# subset construction +sub as_dfa { + my $self = shift; + + my $result = FLAT::DFA->new; + my %subset; + + my %final = map { $_ => 1 } $self->get_accepting; + my @start = sort { $a <=> $b } $self->epsilon_closure( $self->get_starting ); + + my $start = $subset{ _SET_ID(@start) } = $result->add_states(1); + $result->set_starting($start); + + $result->set_accepting( $start ) + if grep $_, @final{@start}; + + my @queue = (\@start); + while (@queue) { + my @states = @{ shift @queue }; + my $S = $subset{ _SET_ID(@states) }; + + for my $symb ($self->alphabet) { + my @to = $self->epsilon_closure( + $self->successors(\@states, $symb) ); + + if ( not exists $subset{_SET_ID(@to)} ) { + push @queue, \@to; + my $T = $subset{_SET_ID(@to)} = $result->add_states(1); + $result->set_accepting($T) + if grep $_, @final{@to}; + } + + $result->add_transition($S, $subset{ _SET_ID(@to) }, $symb); + } + } + + $result; +} + +############ Formatted output + +# Format that Dr. Sukhamay KUNDU likes to use in his assignments :) +# This format is just a undirected graph - so transition and state info is lost + +sub as_undirected { + my $self = shift; + my @symbols = $self->alphabet(); + my @states = $self->get_states(); + my %edges = (); + foreach (@states) { + my $s = $_; + foreach (@symbols) { + my $a = $_; + # foreach state, get all nodes connected to it; ignore symbols and + # treat transitions simply as directed + push(@{$edges{$s}},$self->successors($s,$a)); + foreach ($self->successors($s,$a)) { + push(@{$edges{$_}},$s); + } + } + } + my @lines = (($#states+1)); + foreach (sort{$a <=> $b;}(keys(%edges))) { #<-- iterate over numerically sorted list of keys + @{$edges{$_}} = sort {$a <=> $b;} $self->array_unique(@{$edges{$_}}); #<- make items unique and sort numerically + push(@lines,sprintf("%s(%s):%s",$_,($#{$edges{$_}}+1),join(' ',@{$edges{$_}}))); + } + return join("\n",@lines); + } + +# Format that Dr. Sukhamay KUNDU likes to use in his assignments :) +# This format is just a directed graph - so transition and state info is lost + +sub as_digraph { + my $self = shift; + my @symbols = $self->alphabet(); + my @states = $self->get_states(); + my @lines = (); + foreach (@states) { + my $s = $_; + my @edges = (); + foreach (@symbols) { + my $a = $_; + # foreach state, get all nodes connected to it; ignore symbols and + # treat transitions simply as directed + push(@edges,$self->successors($s,$a)); + } + @edges = sort {$a <=> $b;} $self->array_unique(@edges); #<- make items unique and sort numerically + push(@lines,sprintf("%s(%s): %s",$s,($#edges+1),join(' ',@edges))); + } + return sprintf("%s\n%s",($#states+1),join("\n",@lines)); +} + + +# Graph Description Language, aiSee, etc +sub as_gdl { + my $self = shift; + + my @states = map { + sprintf qq{node: { title:"%s" shape:circle borderstyle: %s}\n}, + $_, + ($self->is_accepting($_) ? "double bordercolor: red" : "solid") + } $self->get_states; + + my @trans; + for my $s1 ($self->get_states) { + for my $s2 ($self->get_states) { + my $t = $self->get_transition($s1, $s2); + + if (defined $t) { + push @trans, sprintf qq[edge: { source: "%s" target: "%s" label: "%s" arrowstyle: line }\n], + $s1, $s2, $t->as_string; + } + }} + + return sprintf "graph: {\ndisplay_edge_labels: yes\n\n%s\n%s}\n", + join("", @states), + join("", @trans); +} + +# Graphviz: dot, etc +## digraph, directed +sub as_graphviz { + my $self = shift; + + my @states = map { + sprintf qq{%s [label="%s",shape=%s]\n}, + $_, + ($self->is_starting($_) ? "start ($_)" : "$_"), + ($self->is_accepting($_) ? "doublecircle" : "circle") + } $self->get_states; + + my @trans; + for my $s1 ($self->get_states) { + for my $s2 ($self->get_states) { + my $t = $self->get_transition($s1, $s2); + + if (defined $t) { + push @trans, sprintf qq[%s -> %s [label="%s"]\n], + $s1, $s2, $t->as_string; + } + }} + + return sprintf "digraph G {\ngraph [rankdir=LR]\n\n%s\n%s}\n", + join("", @states), + join("", @trans); +} +## undirected +sub as_undirected_graphviz { + my $self = shift; + + my @states = map { + sprintf qq{%s [label="%s",shape=%s]\n}, + $_, + ("$_"), + ("circle") + } $self->get_states; + + my @trans; + for my $s1 ($self->get_states) { + for my $s2 ($self->get_states) { + my $t = $self->get_transition($s1, $s2); + + if (defined $t) { + push @trans, sprintf qq[%s -- %s\n], + $s1, $s2, $t->as_string; + } + }} + + return sprintf "graph G {\ngraph [rankdir=LR]\n\n%s\n%s}\n", + join("", @states), + join("", @trans); +} + +sub _SET_ID { return join "\0", sort { $a <=> $b } @_; } + +sub as_summary { + my $self = shift; + my $out = ''; + $out .= sprintf ("States : "); + my @start; + my @final; + foreach ($self->get_states()) { + $out .= sprintf "'$_' "; + if ($self->is_starting($_)) { + push(@start,$_); + } + if ($self->is_accepting($_)) { + push(@final,$_); + } + } + $out .= sprintf ("\nStart State : '%s'\n",join('',@start)); + $out .= sprintf ("Final State(s) : "); + foreach (@final) { + $out .= sprintf "'$_' "; + } + $out .= sprintf ("\nAlphabet : "); + foreach ($self->alphabet()) { + $out .= sprintf "'$_' "; + } + $out .= sprintf ("\nTransitions :\n"); + my @trans; + for my $s1 ($self->get_states) { + for my $s2 ($self->get_states) { + my $t = $self->get_transition($s1, $s2); + if (defined $t) { + push @trans, sprintf qq[%s -> %s on "%s"\n], + $s1, $s2, $t->as_string; + } + }} + $out .= join('',@trans); + return $out; +} + +1; + +__END__ + +=head1 USAGE + +In addition to implementing the interface specified in L<FLAT>, FLAT::NFA +objects provide the following NFA-specific methods: + +=over + +=item $nfa-E<gt>epsilon_closure(@states) + +Returns the set of states (without duplicates) which are reachable from +@states via zero or more epsilon-labeled transitions. + +=item $nfa-E<gt>trace($string) + +Returns a list of N+1 arrayrefs, where N is the length of $string. The +I-th arrayref contains the states which are reachable from the starting +state(s) of $nfa after reading I characters of $string. Correctly accounts +for epsilon transitions. + +=item $nfa-E<gt>as_undirected + +Outputs FA in a format that may be easily read into an external program as +a description of an undirected graph. + +=item $nfa-E<gt>as_digraph + +Outputs FA in a format that may be easily read into an external program as +a description of an directed graph. + +=item $nfa-E<gt>as_gdl + +Outputs FA in Graph Description Language (GDL), including directed transitions +with symbols and state names labeled. + +=item $nfa-E<gt>as_graphviz + +Outputs FA in Graphviz format, including directed transitions with symbols and +and state names labeled. This output may be directly piped into any of the +Graphviz layout programs, and in turn one may output an image using a single +commandline instruction. C<fash> uses this function to implement its "nfa2gv" +command: + + fash nfa2gv "a*b" | dot -Tpng > nfa.png + +=item $nfa-E<gt>as_undirected_graphviz + +Outputs FA in Graphviz format, with out the directed transitions or labels. +The output is suitable for any of the Graphvize layout programs, as discussed +above. + +=item $nfa-E<gt>as_summary + +Outputs a summary of the FA, including its states, symbols, and transition matrix. +It is useful for manually validating what the FA looks like. + +=back + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. diff --git a/lib/FLAT/PFA.pm b/lib/FLAT/PFA.pm new file mode 100644 index 0000000..5557525 --- /dev/null +++ b/lib/FLAT/PFA.pm @@ -0,0 +1,293 @@ +package FLAT::PFA; +use strict; +use base 'FLAT::NFA'; +use Carp; + +use FLAT::Transition; + +use constant LAMBDA => '#lambda'; + +# Note: in a PFA, states are made up of active nodes. In this implementation, we have +# decided to retain the functionality of the state functions in FA.pm, although the entities +# being manipulated are technically nodes, not states. States are only explicitly tracked +# once the PFA is serialized into an NFA. Therefore, the TRANS member of the PFA object is +# the nodal transition function, gamma. The state transition function, delta, is not used +# in anyway, but is derived out of the PFA->NFA conversion process. + + +# The new way of doing things eliminated from PFA.pm of FLAT::Legacy is the +# need to explicitly track: start nodes, final nodes, symbols, and lambda & epsilon symbols, + +sub new { + my $pkg = shift; + my $self = $pkg->SUPER::new(@_); # <-- SUPER is FLAT::NFA + return $self; +} + +# Singleton is no different than the NFA singleton +sub singleton { + my ($class, $char) = @_; + my $pfa = $class->new; + if (not defined $char) { + $pfa->add_states(1); + $pfa->set_starting(0); + } elsif ($char eq "") { + $pfa->add_states(1); + $pfa->set_starting(0); + $pfa->set_accepting(0); + } else { + $pfa->add_states(2); + $pfa->set_starting(0); + $pfa->set_accepting(1); + $pfa->set_transition(0, 1, $char); + } + return $pfa; +} + +# attack of the clones +sub as_pfa { $_[0]->clone() } + +sub set_starting { + my ($self, @states) = @_; + $self->_assert_states(@states); + $self->{STATES}[$_]{starting} = 1 for @states; +} + +# Creates a single start state with epsilon transitions from +# the former start states; +# Creates a single final state with epsilon transitions from +# the former accepting states +sub pinch { + my $self = shift; + my $symbol = shift; + my @starting = $self->get_starting; + if (@starting > 1) { + my $newstart = $self->add_states(1); + map {$self->add_transition($newstart,$_,$symbol)} @starting; + $self->unset_starting(@starting); + $self->set_starting($newstart); + } + # + my @accepting = $self->get_accepting; + if (@accepting > 1) { + my $newfinal = $self->add_states(1); + map {$self->add_transition($_,$newfinal,$symbol)} @accepting; + $self->unset_accepting(@accepting); + $self->set_accepting($newfinal); + } + return; +} + +# Implement the joining of two PFAs with lambda transitions +# Note: using epsilon pinches for simplicity +sub shuffle { + my @pfas = map { $_->as_pfa } @_; + my $result = $pfas[0]->clone; + $result->_swallow($_) for @pfas[1 .. $#pfas]; + $result->pinch(LAMBDA); + $result; +} + +############## + +sub is_tied { + my ($self, $state) = @_; + $self->_assert_states($state); + return $self->{STATES}[$state]{tied}; +} + +sub set_tied { + my ($self, @states) = @_; + $self->_assert_states(@states); + $self->{STATES}[$_]{tied} = 1 for @states; +} + +sub unset_tied { + my ($self, @states) = @_; + $self->_assert_states(@states); + $self->{STATES}[$_]{tied} = 0 for @states; +} + +sub get_tied { + my $self = shift; + return grep { $self->is_tied($_) } $self->get_states; +} + +############## + +# joins two PFAs in a union (or) - no change from NFA +sub union { + my @pfas = map { $_->as_pfa } @_; + my $result = $pfas[0]->clone; + $result->_swallow($_) for @pfas[1 .. $#pfas]; + $result->pinch(''); + $result; +} + +# joins two PFAs via concatenation - no change from NFA +sub concat { + my @pfas = map { $_->as_pfa } @_; + + my $result = $pfas[0]->clone; + my @newstate = ([ $result->get_states ]); + my @start = $result->get_starting; + + for (1 .. $#pfas) { + push @newstate, [ $result->_swallow( $pfas[$_] ) ]; + } + + $result->unset_accepting($result->get_states); + $result->unset_starting($result->get_states); + $result->set_starting(@start); + + for my $pfa_id (1 .. $#pfas) { + for my $s1 ($pfas[$pfa_id-1]->get_accepting) { + for my $s2 ($pfas[$pfa_id]->get_starting) { + $result->set_transition( + $newstate[$pfa_id-1][$s1], + $newstate[$pfa_id][$s2], "" ); + }} + } + + $result->set_accepting( + @{$newstate[-1]}[ $pfas[-1]->get_accepting ] ); + + $result; +} + +# forms closure around a the given PFA - no change from NFA +sub kleene { + my $result = $_[0]->clone; + + my ($newstart, $newfinal) = $result->add_states(2); + + $result->set_transition($newstart, $_, "") + for $result->get_starting; + $result->unset_starting( $result->get_starting ); + $result->set_starting($newstart); + + $result->set_transition($_, $newfinal, "") + for $result->get_accepting; + $result->unset_accepting( $result->get_accepting ); + $result->set_accepting($newfinal); + + $result->set_transition($newstart, $newfinal, ""); + $result->set_transition($newfinal, $newstart, ""); + + $result; +} + +sub as_nfa { + my $self = shift; + my $result = FLAT::NFA->new(); + # Dstates is initially populated with the start state, which + # is exactly the set of all nodes marked as a starting node + my @Dstates = [sort($self->get_starting())]; # I suppose all start states are considered 'tied' + my %DONE = (); # |- what about all accepting states? I think so... + # the main while loop that ends when @Dstates becomes exhausted + my %NEW = (); + while (@Dstates) { + my $current = pop(@Dstates); + my $currentid = join(',',@{$current}); + $DONE{$currentid}++; # mark done + foreach my $symbol ($self->alphabet(),'') { # Sigma UNION epsilon + if (LAMBDA eq $symbol) { + my @NEXT = (); + my @tmp = $self->successors([@{$current}],$symbol); + if (@tmp) { + my @pred = $self->predecessors([@tmp],LAMBDA); + if ($self->array_is_subset([@pred],[@{$current}])) { + push(@NEXT,@tmp,$self->array_complement([@{$current}],[@pred])); + @NEXT = sort($self->array_unique(@NEXT)); + my $nextid = join(',',@NEXT); + push(@Dstates,[@NEXT]) if (!exists($DONE{$nextid})); + # make new states if none exist and track + if (!exists($NEW{$currentid})) {$NEW{$currentid} = $result->add_states(1)}; + if (!exists($NEW{$nextid})) {$NEW{$nextid} = $result->add_states(1) }; + $result->add_transition($NEW{$currentid},$NEW{$nextid},''); + } + } + } else { + foreach my $node (@{$current}) { + my @tmp = $self->successors([$node],$symbol); + foreach my $new (@tmp) { + my @NEXT = (); + push(@NEXT,$new,$self->array_complement([@{$current}],[$node])); + @NEXT = sort($self->array_unique(@NEXT)); + my $nextid = join(',',@NEXT); + push(@Dstates,[@NEXT]) if (!exists($DONE{$nextid})); + # make new states if none exist and track + if (!exists($NEW{$currentid})) {$NEW{$currentid} = $result->add_states(1)}; + if (!exists($NEW{$nextid})) {$NEW{$nextid} = $result->add_states(1) }; + $result->add_transition($NEW{$currentid},$NEW{$nextid},$symbol); + } + } + } + } + } + $result->set_starting($NEW{join(",",sort $self->get_starting())}); + $result->set_accepting($NEW{join(",",sort $self->get_accepting())}); + return $result; + } + +1; + +__END__ + +=head1 NAME + +FLAT::PFA - Parallel finite automata + +=head1 SYNOPSIS + +A FLAT::PFA object is a finite automata whose transitions are labeled either +with characters, the empty string (epsilon), or a concurrent line of execution +(lambda). It essentially models two FSA in a non-deterministic way such that +a string is valid it puts the FSA of the shuffled languages both into a final, +or accepting, state. A PFA is an NFA, and as such exactly describes a regular +language. + +A PFA contains nodes and states. A state is made up of whatever nodes happen +to be active. There are two transition functions, nodal transitions and state +transitions. When a PFA is converted into a NFA, there is no longer a need for +nodes or nodal transitions, so they go are eliminated. PFA model state spaces +much more compactly than NFA, and an N state PFA may represent 2**N non-deterministic +states. This also means that a PFA may represent up to 2^(2^N) deterministic states. + +=head1 USAGE + +(not implemented yet) + +In addition to implementing the interface specified in L<FLAT> and L<FLAT::NFA>, +FLAT::PFA objects provide the following PFA-specific methods: + +=over + +=item $pfa-E<gt>shuffle + +Shuffle construct for building a PFA out of a PRE (i.e., a regular expression with +the shuffle operator) + +=item $pfa-E<gt>as_nfa + +Converts a PFA to an NFA by enumerating all states; similar to the Subset Construction +Algorithm, it does not implement e-closure. Instead it treats epsilon transitions +normally, and joins any states resulting from a lambda (concurrent) transition +using an epsilon transition. + +=back + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. diff --git a/lib/FLAT/Regex.pm b/lib/FLAT/Regex.pm new file mode 100644 index 0000000..2c5c243 --- /dev/null +++ b/lib/FLAT/Regex.pm @@ -0,0 +1,194 @@ +package FLAT::Regex; +use base 'FLAT'; +use strict; +use Carp; + +use FLAT::Regex::Parser; +use FLAT::Regex::Op; + +my $PARSER = FLAT::Regex::Parser->new(qw[ alt concat star ]); +#### TODO: error checking in the parse + +sub _parser { $PARSER } + +sub new { + my ($pkg, $string) = @_; + my $result = $pkg->_parser->parse($string) + or croak qq[``$string'' is not a valid regular expression]; + + $pkg->_from_op( $result ); +} + +sub _from_op { + my ($proto, $op) = @_; + $proto = ref $proto || $proto; ## I really do want this + + bless [ $op ], $proto; +} + +sub op { + $_[0][0]; +} + +use overload '""' => 'as_string'; +sub as_string { + $_[0]->op->as_string(0); +} + +sub as_perl_regex { + my ($self, %opts) = @_; + + my $fmt = $opts{anchored} ? '(?:\A%s\z)' : '(?:%s)'; + return sprintf $fmt, $self->op->as_perl_regex(0); +} + +sub contains { + my ($self, $string) = @_; + $string =~ $self->as_perl_regex(anchored => 1); +} + +sub as_nfa { + $_[0]->op->as_nfa; +} + +sub as_pfa { + $_[0]->op->as_pfa; +} + +#### regular language standard interface implementation: +#### TODO: parameter checking? + +sub as_regex { + $_[0]; +} + +sub union { + my $self = $_[0]; + my $op = FLAT::Regex::op::alt->new( map { $_->as_regex->op } @_ ); + $self->_from_op($op); +} + +sub intersect { + my @dfas = map { $_->as_dfa } @_; + my $self = shift @dfas; + $self->intersect(@dfas)->as_regex; +} + +sub complement { + my $self = shift; + $self->as_dfa->complement->as_regex; +} + +sub concat { + my $self = $_[0]; + my $op = FLAT::Regex::op::concat->new( map { $_->as_regex->op } @_ ); + $self->_from_op($op); +} + +sub kleene { + my $self = shift; + my $op = FLAT::Regex::op::star->new( $self->op ); + $self->_from_op($op); +} + +sub reverse { + my $self = shift; + my $op = $self->op->reverse; + $self->_from_op($op); +} + +sub is_empty { + $_[0]->op->is_empty; +} + +sub is_finite { + $_[0]->op->is_finite; +} + +1; + +__END__ + +=head1 NAME + +FLAT::Regex - Regular expressions + +=head1 SYNOPSIS + +A FLAT::Regex object is a regular expression. + +=head1 USAGE + +In addition to implementing the interface specified in L<FLAT>, FLAT::Regex +objects provide the following regex-specific methods: + +=over + +=item FLAT::Regex-E<gt>new($string) + +Returns a regex object representing the expression given in $string. C<|> +and C<+> can both be used to denote alternation. C<*> denotes Kleene star, and +parentheses can be used for grouping. No other features or shortcut notation +is currently supported (character classes, {n,m} repetition, etc). + +Whitespaces is ignored. To specify a literal space, use C<[ ]>. This syntax +can also be used to specify atomic "characters" longer than a single +character. For example, the expression: + + [foo]abc[bar]* + +is treated as a regular expression over the symbols "a", "b", "c", "foo", +and "bar". In particular, this means that when the regular expression is +reversed, "foo" and "bar" remain the same (i.e, they do not become "oof" and +"rab"). + +The empty regular expression (epsilon) is written as C<[]>, and the null +regular expression (sometimes called phi) is specified with the C<#> +character. To specify a literal hash-character, use C<[#]>. Including +literal square bracket characters is currently not supported. + +The expression "" (or any string containing only whitespace) is not a valid +FLAT regex expression. Either C<[]> or C<#> are probably what was intended. + +=item $regex-E<gt>as_string + +Returns the string representation of the regex, in the same format as above. +It is NOT necessarily true that + + FLAT::Regex->new($string)->as_string + +is identical to $string, especially if $string contains whitespace or +redundant parentheses. + +=item $regex-E<gt>as_perl_regex + +=item $regex-E<gt>as_perl_regex(anchored => $bool); + +Returns an equivalent Perl regular expression. If the "anchored" option +is set to a true value, the regular expression will be anchored with +C<\A> and C<\z>. The default behavior is to omit the anchors. + +The Perl regex will not contain capturing parentheses. "Extended" characters +that are written as "[char]" in FLAT regexes will be written without the +square brackets in the corresponding Perl regex. So the following: + + FLAT::Regex->new("[foo][bar]*")->as_perl_regex + +will be equal to "(?:foo(?:bar)*)". + +=back + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. diff --git a/lib/FLAT/Regex/Op.pm b/lib/FLAT/Regex/Op.pm new file mode 100644 index 0000000..76e796c --- /dev/null +++ b/lib/FLAT/Regex/Op.pm @@ -0,0 +1,282 @@ +package FLAT::Regex::Op; +use strict; + +sub new { + my $pkg = shift; + ## flatten alike operations, i.e, "a+(b+c)" into "a+b+c" + my @flat = map { UNIVERSAL::isa($_, $pkg) ? $_->members : $_ } @_; + + bless \@flat, $pkg; +} + +sub members { + my $self = shift; + wantarray ? @$self[0 .. $#$self] : $self->[0]; +} + + +################################# +#### regex operators / components + +package FLAT::Regex::Op::atomic; +use base 'FLAT::Regex::Op'; + +sub as_string { + my $t = $_[0]->members; + + return "#" if not defined $t; + return $t =~ /^\w$/ + ? $t + : "[$t]"; +} + +sub as_perl_regex { + my $r = $_[0]->members; + + return "(?!)" if not defined $r; + + $r = quotemeta $r; + return $r =~ /^\w$/ ? $r : "(?:$r)"; +} + +sub as_nfa { + FLAT::NFA->singleton( $_[0]->members ); +} + +sub as_pfa { + FLAT::PFA->singleton( $_[0]->members ); +} + +sub from_parse { + my ($pkg, @item) = @_; + my $i = $item[1]; + + return $pkg->new("") if $i eq "[]"; + return $pkg->new(undef) if $i eq "#"; + + $i =~ s/^\[|\]$//g; + + return $pkg->new($i); +} + +sub reverse { + $_[0]; +} + +sub is_empty { + not defined $_[0]->members; +} + +sub has_nonempty_string { + my $self = shift; + defined $self->members and length $self->members; +} + +sub is_finite { + 1 +} + +############################## +package FLAT::Regex::Op::star; +use base 'FLAT::Regex::Op'; + +sub parse_spec { "%s '*'" } +sub precedence { 30 } + +sub as_string { + my ($self, $prec) = @_; + my $result = $self->members->as_string($self->precedence) . "*"; + return $prec > $self->precedence ? "($result)" : $result; +} + +sub as_perl_regex { + my ($self, $prec) = @_; + my $result = $self->members->as_perl_regex($self->precedence) . "*"; + return $prec > $self->precedence ? "(?:$result)" : $result; +} + +sub as_nfa { + my $self = shift; + $self->members->as_nfa->kleene; +} + +sub as_pfa { + my $self = shift; + $self->members->as_pfa->kleene; +} + +sub from_parse { + my ($pkg, @item) = @_; + $pkg->new( $item[1] ); +} + +sub reverse { + my $self = shift; + my $op = $self->members->reverse; + __PACKAGE__->new($op); +} + +sub is_empty { + 0 +} + +sub has_nonempty_string { + $_[0]->members->has_nonempty_string; +} + +sub is_finite { + ! $_[0]->members->has_nonempty_string; +} + + +################################ +package FLAT::Regex::Op::concat; +use base 'FLAT::Regex::Op'; + +sub parse_spec { "%s(2..)"; } +sub precedence { 20 } + +sub as_string { + my ($self, $prec) = @_; + my $result = join "", + map { $_->as_string($self->precedence) } + $self->members; + return $prec > $self->precedence ? "($result)" : $result; +} + +sub as_perl_regex { + my ($self, $prec) = @_; + my $result = join "", + map { $_->as_perl_regex($self->precedence) } + $self->members; + return $prec > $self->precedence ? "(?:$result)" : $result; +} + +sub as_nfa { + my $self = shift; + my @parts = map { $_->as_nfa } $self->members; + $parts[0]->concat( @parts[1..$#parts] ); +} + +sub as_pfa { + my $self = shift; + my @parts = map { $_->as_pfa } $self->members; + $parts[0]->concat( @parts[1..$#parts] ); +} + +sub from_parse { + my ($pkg, @item) = @_; + $pkg->new( @{ $item[1] } ); +} + +## note: "reverse" conflicts with perl builtin +sub reverse { + my $self = shift; + my @ops = CORE::reverse map { $_->reverse } $self->members; + __PACKAGE__->new(@ops); +} + +sub is_empty { + my $self = shift; + my @members = $self->members; + for (@members) { + return 1 if $_->is_empty; + } + return 0; +} + +sub has_nonempty_string { + my $self = shift; + return 0 if $self->is_empty; + + my @members = $self->members; + for (@members) { + return 1 if $_->has_nonempty_string; + } + return 0; +} + +sub is_finite { + my $self = shift; + return 1 if $self->is_empty; + + my @members = $self->members; + for (@members) { + return 0 if not $_->is_finite; + } + return 1; +} + +############################# +package FLAT::Regex::Op::alt; +use base 'FLAT::Regex::Op'; + +sub parse_spec { "%s(2.. /[+|]/)" } +sub precedence { 10 } + +sub as_string { + my ($self, $prec) = @_; + my $result = join "+", + map { $_->as_string($self->precedence) } + $self->members; + return $prec > $self->precedence ? "($result)" : $result; +} + +sub as_perl_regex { + my ($self, $prec) = @_; + my $result = join "|", + map { $_->as_perl_regex($self->precedence) } + $self->members; + return $prec > $self->precedence ? "(?:$result)" : $result; +} + +sub as_nfa { + my $self = shift; + my @parts = map { $_->as_nfa } $self->members; + $parts[0]->union( @parts[1..$#parts] ); +} + +sub as_pfa { + my $self = shift; + my @parts = map { $_->as_pfa } $self->members; + $parts[0]->union( @parts[1..$#parts] ); +} + +sub from_parse { + my ($pkg, @item) = @_; + $pkg->new( @{ $item[1] } ); +} + +sub reverse { + my $self = shift; + my @ops = map { $_->reverse } $self->members; + __PACKAGE__->new(@ops); +} + +sub is_empty { + my $self = shift; + my @members = $self->members; + for (@members) { + return 0 if not $_->is_empty; + } + return 1; +} + +sub has_nonempty_string { + my $self = shift; + my @members = $self->members; + for (@members) { + return 1 if $_->has_nonempty_string; + } + return 0; +} + +sub is_finite { + my $self = shift; + my @members = $self->members; + for (@members) { + return 0 if not $_->is_finite; + } + return 1; +} +1; diff --git a/lib/FLAT/Regex/Parser.pm b/lib/FLAT/Regex/Parser.pm new file mode 100644 index 0000000..deb73f1 --- /dev/null +++ b/lib/FLAT/Regex/Parser.pm @@ -0,0 +1,82 @@ +package FLAT::Regex::Parser; +use strict; + +#### Is this one level of abstraction too far? Parser generator generators.. + +#### TODO: try YAPP, since recursive descent is SLOOOW +use Parse::RecDescent; +use FLAT::Regex::Op; + +use vars '$CHAR'; +$CHAR = qr{ [A-Za-z0-9_\$\#] | \[[^\]]*\] }x; + +sub new { + my $pkg = shift; + my @ops = sort { $a->{prec} <=> $b->{prec} } + map {{ + pkg => "FLAT::Regex::Op::$_", + prec => "FLAT::Regex::Op::$_"->precedence, + spec => "FLAT::Regex::Op::$_"->parse_spec, + short => $_ + }} @_; + + my $lowest = shift @ops; + my $grammar = qq! + parse: + $lowest->{short} /^\\Z/ { \$item[1] } + !; + + my $prev = $lowest; + for (@ops) { + my $spec = sprintf $prev->{spec}, $_->{short}; + + $grammar .= qq! + $prev->{short}: + $spec { $prev->{pkg}\->from_parse(\@item) } + | $_->{short} { \$item[1] } + !; + + $prev = $_; + } + + my $spec = sprintf $prev->{spec}, "atomic"; + $grammar .= qq! + $prev->{short}: + $spec { $prev->{pkg}\->from_parse(\@item) } + | atomic { \$item[1] } + + atomic: + "(" $lowest->{short} ")" { \$item[2] } + | /\$FLAT::Regex::Parser::CHAR/ + { FLAT::Regex::Op::atomic->from_parse(\@item) } + !; + + Parse::RecDescent->new($grammar); +} + +1; + + +__END__ + +original parser: + +use vars '$CHAR'; +$CHAR = qr{ [A-Za-z0-9_\!\@\#\$\%\&] | \[[^\]]*\] }x; + +my $PARSER = Parse::RecDescent->new(<<'__EOG__') or die; + parse: + alt /^\Z/ { $item[1] } + alt: + concat(2.. /[+|]/) { FLAT::Regex::Op::alt->from_parse(@item) } + | concat { $item[1] } + concat: + star(2..) { FLAT::Regex::Op::concat->from_parse(@item) } + | star { $item[1] } + star : + atomic '*' { FLAT::Regex::Op::star->from_parse(@item) } + | atomic { $item[1] } + atomic: + "(" alt ")" { $item[2] } + | /$FLAT::Regex::CHAR/ { FLAT::Regex::Op::atomic->from_parse(@item) } +__EOG__ diff --git a/lib/FLAT/Regex/Transform.pm b/lib/FLAT/Regex/Transform.pm new file mode 100644 index 0000000..cd0cf56 --- /dev/null +++ b/lib/FLAT/Regex/Transform.pm @@ -0,0 +1,18 @@ +package FLAT::Regex::Transform; + +# Extends FLAT::Regex::WithExtraOps with PRegex transformations +# (i.e., reductions based on: w*v & a*b + +use base 'FLAT::Regex::WithExtraOps'; + +sub new { + my $pkg = shift; + my $self = $pkg->SUPER::new(@_); + return $self; +} + +# Ideally, the transformation should be implemented as an iterator. This +# approach will be finite for shuffles with NO closed strings, but will carry on +# indefinitely for the shuffle of strings where at least one of the strings is closed + +1; diff --git a/lib/FLAT/Regex/Util.pm b/lib/FLAT/Regex/Util.pm new file mode 100644 index 0000000..516ad9f --- /dev/null +++ b/lib/FLAT/Regex/Util.pm @@ -0,0 +1,33 @@ +package FLAT::Regex::Util; +use base 'FLAT::Regex'; + +use strict; +use Carp; + +sub get_symbol { + my @symbols = qw/0 1/; + return $symbols[rand(2)]; +} + +sub get_op { + my @ops = ('*','+','&','','','','','','',''); + return $ops[rand(10)]; +} + +sub get_random { + my $length = shift; + my $string = ''; + if (1 < $length) { + $string = get_symbol().get_op().get_random(--$length); + } else { + $string = get_symbol(); + } + return $string; +} + +sub random_pre { + my $length = ( $_[0] ? $_[0] : 32 ); + return FLAT::Regex::WithExtraOps->new(get_random($length)); +} + +1; diff --git a/lib/FLAT/Regex/WithExtraOps.pm b/lib/FLAT/Regex/WithExtraOps.pm new file mode 100644 index 0000000..b366d7c --- /dev/null +++ b/lib/FLAT/Regex/WithExtraOps.pm @@ -0,0 +1,109 @@ +package FLAT::Regex::WithExtraOps; +use base 'FLAT::Regex'; + +use strict; +use Carp; + +my $PARSER = FLAT::Regex::Parser->new(qw[ alt concat star negate shuffle ]); +sub _parser { $PARSER } + +sub members { + my $self = shift; + wantarray ? @$self[0 .. $#$self] : $self->[0]; +} + +#### Precedence +# 30 ::star +# 20 ::concat +# 15 ::negate <---<< WithExtraOps +# 12 ::shuffle <---<< WithExtraOps +# 10 ::alt +# 0 ::atomic + +############################### +package FLAT::Regex::Op::negate; +use base "FLAT::Regex::Op"; +use Carp; + +sub parse_spec { "'~' %s"; } +sub precedence { 15 } # between concat and alternation + +sub as_string { + my ($self, $prec) = @_; + my $result = "~" . $self->members->as_string($self->precedence); + return $prec > $self->precedence ? "($result)" : $result; +} + +sub from_parse { + my ($pkg, @item) = @_; + $pkg->new( $item[2] ); +} + +## note: "reverse" conflicts with perl builtin +sub reverse { + my $self = shift; + my $op = $self->members->reverse; + __PACKAGE__->new($op); +} + +sub is_empty { + croak "Not implemented for negated regexes"; +} + +sub has_nonempty_string { + croak "Not implemented for negated regexes"; +} + +sub is_finite { + croak "Not implemented for negated regexes"; +} + +############################### +package FLAT::Regex::Op::shuffle; +use base 'FLAT::Regex::Op'; +use Carp; + +sub parse_spec { "%s(2.. /[&]/)" } +sub precedence { 12 } + +sub as_string { + my ($self, $prec) = @_; + my $result = join "&", + map { $_->as_string($self->precedence) } + $self->members; + return $prec > $self->precedence ? "($result)" : $result; +} + +sub as_perl_regex { + my $self = shift; + croak "Not implemented for shuffled regexes"; +} + +sub from_parse { + my ($pkg, @item) = @_; + $pkg->new( @{ $item[1] } ); +} + +sub as_pfa { + my $self = shift; + my @parts = map { $_->as_pfa } $self->members; + $parts[0]->shuffle( @parts[1..$#parts] ); +} + +# Implement? +sub reverse { + my $self = shift; + croak "Not implemented for shuffled regexes"; +} + +sub is_empty { + croak "Not implemented for shuffled regexes"; +} + +sub has_nonempty_string { + croak "Not implemented for shuffled regexes"; +} + +sub is_finite { + croak "Not implemented for shuffled regexes"; +} diff --git a/lib/FLAT/Symbol.pm b/lib/FLAT/Symbol.pm new file mode 100644 index 0000000..aaadccc --- /dev/null +++ b/lib/FLAT/Symbol.pm @@ -0,0 +1,98 @@ +# +# Conceptual Experiment - not currently implemented anywhere... +# + +package FLAT::Symbol + +use strict; +use Carp; + +sub new { + my ($pkg, $string, $type) = @_; + bless { + STRING => $string, + TYPE => $type, + }, $pkg; +} + +sub as_string { + return $_[0]->{STRING}; +} + +sub get_type } + return $_[0]->{TYPE}; +} + +sub set_type { + $_[0]->{TYPE} = $_[1]; +} + +1; + +################## + +package FLAT::Symbol::Regular; +use base 'FLAT::Symbol'; + +sub new { + my $pkg = shift; + my $self = $pkg->SUPER::new($_[0],'Regular'); + return $self; +} + +sub get_type { + return 'Regular'; +} + +sub set_type { + croak("Sorry, can't change type for this symbol"); +} + +1; + +################## + +package FLAT::Symbol::Special; +use base 'FLAT::Symbol'; + +sub new { + my $pkg = shift; + my $self = $pkg->SUPER::new($_[0],'Special'); + return $self; +} + +sub get_type { + return 'Special'; +} + +sub set_type { + croak("Sorry, can't change type for this symbol");} + +1; + +__END__ + +=head1 NAME + +FLAT::Symbol - Base class for transition symbol. + +=head1 SYNOPSIS + +A super class that is intended to provide a simple mechanism for storing a symbol that might be +in conflict with another symbol in string form. TYPE is used to distinguish. Currenly this neither +this, nor its current sub classes, FLAT::Symbol::Regular and FLAT::Symbol::Special, are used. + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. diff --git a/lib/FLAT/Transition.pm b/lib/FLAT/Transition.pm new file mode 100644 index 0000000..fc385f3 --- /dev/null +++ b/lib/FLAT/Transition.pm @@ -0,0 +1,66 @@ +package FLAT::Transition; +use strict; +use Carp; + +sub new { + my ($pkg, @things) = @_; + bless { map { $_ => 1 } @things }, $pkg; +} + +sub does { + my ($self, @things) = @_; + return 1 if @things == 0; + return !! grep $self->{$_}, @things; +} + +sub add { + my ($self, @things) = @_; + @$self{@things} = (1) x @things; +} + +sub delete { + my ($self, @things) = @_; + delete $self->{$_} for @things; +} + +sub alphabet { + my $self = shift; + sort { $a cmp $b } keys %$self; +} + +sub as_string { + my $self = shift; + join ",", map { length $_ ? $_ : "epsilon" } $self->alphabet; +} + +1; + +__END__ + +=head1 NAME + +FLAT::Transition - a transition base class. + +=head1 SYNOPSIS + +Default implementation of the Transition class, used to manage transitions +from one state to others. This class is meant for internal use. + +=head1 USAGE + +used internally; + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. diff --git a/lib/FLAT/XFA.pm b/lib/FLAT/XFA.pm new file mode 100644 index 0000000..67e073c --- /dev/null +++ b/lib/FLAT/XFA.pm @@ -0,0 +1,3 @@ +package FLAT::XFA; + +1; |