diff options
| author | Daniel Friesel <derf@finalrewind.org> | 2017-04-03 15:04:15 +0200 | 
|---|---|---|
| committer | Daniel Friesel <derf@finalrewind.org> | 2017-04-03 15:04:15 +0200 | 
| commit | 00e57331b1c7ef2b1f402f41e1223308e0d8ce61 (patch) | |
| tree | 05e9b4223072582a5a6843de6d9845213a94f341 /lib/FLAT | |
initial commit
Diffstat (limited to 'lib/FLAT')
| -rw-r--r-- | lib/FLAT/CMD.pm | 533 | ||||
| -rw-r--r-- | lib/FLAT/CMD/AcyclicStrings.pm | 54 | ||||
| -rw-r--r-- | lib/FLAT/CMD/DFTStrings.pm | 55 | ||||
| -rw-r--r-- | lib/FLAT/DFA.pm | 557 | ||||
| -rw-r--r-- | lib/FLAT/FA.pm | 554 | ||||
| -rw-r--r-- | lib/FLAT/NFA.pm | 509 | ||||
| -rw-r--r-- | lib/FLAT/PFA.pm | 293 | ||||
| -rw-r--r-- | lib/FLAT/Regex.pm | 194 | ||||
| -rw-r--r-- | lib/FLAT/Regex/Op.pm | 282 | ||||
| -rw-r--r-- | lib/FLAT/Regex/Parser.pm | 82 | ||||
| -rw-r--r-- | lib/FLAT/Regex/Transform.pm | 18 | ||||
| -rw-r--r-- | lib/FLAT/Regex/Util.pm | 33 | ||||
| -rw-r--r-- | lib/FLAT/Regex/WithExtraOps.pm | 109 | ||||
| -rw-r--r-- | lib/FLAT/Symbol.pm | 98 | ||||
| -rw-r--r-- | lib/FLAT/Transition.pm | 66 | ||||
| -rw-r--r-- | lib/FLAT/XFA.pm | 3 | 
16 files changed, 3440 insertions, 0 deletions
| diff --git a/lib/FLAT/CMD.pm b/lib/FLAT/CMD.pm new file mode 100644 index 0000000..5bd0e2d --- /dev/null +++ b/lib/FLAT/CMD.pm @@ -0,0 +1,533 @@ +package FLAT::CMD; +use FLAT; +use FLAT::Regex; +use FLAT::NFA; +use FLAT::DFA; +use Carp; + +=head1 NAME + +CMD - Commandline interface for the Formal Language & Automata Toolkit + +=head1 SYNOPSIS + +CMD.pm is provides an interface to the C<fash> commandline utility that offers +certain features implemented in FLAT.  Consequently, this interface is also +available using the C<perl -MFLAT::CMD -e func> paradigm, but C<fash> makes +things a lot more convenient. + +=head1 USAGE + +All regular language objects in FLAT implement the following methods. +Specific regular language representations (regex, NFA, DFA) may implement +additional methods that are outlined in the repsective POD pages. + +=cut + +# Support for perl one liners - like what CPAN.pm uses #<- should move all to another file +use base 'Exporter'; #instead of: use Exporter (); @ISA = 'Exporter'; +use vars qw(@EXPORT $AUTOLOAD); + +@EXPORT = qw(compare dump dfa2gv nfa2gv pfa2gv dfa2undgv nfa2undgv pfa2undgv dfa2digraph +	     nfa2digraph pfa2digraph dfa2undirected nfa2undirected pfa2undirected random_pre random_re +             savedfa test help +	     ); + +sub AUTOLOAD { +    my($l) = $AUTOLOAD; +    $l =~ s/.*:://; +    my(%EXPORT); +    @EXPORT{@EXPORT} = ''; +    if (exists $EXPORT{$l}){ +	FLAT::CMD->$l(@_); +    } +} + +sub help { +print <<END +__________             .__    ___________.____         ___________ +\______   \ ___________|  |   \_   _____/|    |   _____\__    ___/ + |     ___// __ \_  __ \  |    |    __)  |    |   \__  \ |    |    + |    |   \  ___/|  | \/  |__  |     \   |    |___ / __ \|    |    + |____|    \___  >__|  |____/  \___  /   |_______ (____  /____|    +               \/                  \/            \/    \/     +	        +  Everything is wrt parallel regular expressions, i.e.,  +  with the addtional shuffle operator, "&".  All this  +  means is that you can use the ambersand (&) as a symbol +  in the regular expressions you submit because it will be  +  detected as an operator.That said, if you avoid using +  the "&" operator, you can forget about all that shuffle +  business. + +%perl -MFLAT::CMD -e +    "somestrings" 're1'       # creates all valid strings via acyclic path, no cycles yet +    "compare  're1','re2'"   # comares 2 regexs | see note [2]  +    "dump     're1'"         # dumps parse trees | see note[1]	    +    "dfa2gv  're1'"          # dumps graphviz digraph desc | see note[1]   +    "nfa2gv  're1'"          # dumps graphviz digraph desc | see note[1]   +    "pfa2gv  're1'"          # dumps graphviz digraph desc | see note[1]   +    "dfa2undgv  're1'"       # dumps graphviz undirected graph desc | see note[1]   +    "nfa2undgv  're1'"       # dumps graphviz undirected graph desc | see note[1]   +    "pfa2undgv  're1'"       # dumps graphviz undirected graph desc | see note[1]   +    "dfa2digraph 're1'"      # dumps directed graph without transitions +    "nfa2digraph 're1'"      # dumps directed graph without transitions +    "pfa2digraph 're1'"      # dumps directed graph without transitions +    "dfa2undirected 're1'"   # dumps undirected graph without transitions +    "nfa2undirected 're1'"   # dumps undirected graph without transitions +    "pfa2undirected 're1'"   # dumps undirected graph without transitions +     random_pre  +     random_re +    "savedfa 're1'"          # converts PRE to min dfa, then serializes to disk +    "test 'regex' 'string1'" # give a regex, reports if subsequent strings are valid +     help + +NOTES: +[1] This means you could presumably do something like the following: +    %perl -MFLAT -e command < text_file_with_1_regex_per_line.txt +                    ^^^^^^^   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[2] This command compares the minimal DFAs of each regular expression; +    if there exists a exact 1-1 mapping of symbols, states, and  +    transitions then the DFAs are considered equal.  This means that  +    "abc" will be equal to "def"  To make matters more confusing, "ab+ac"  +    would be equivalent to "xy+xz"; or worse yet, "z(x+y)". So to the  +    'compare' command, "ab+ac" == "xy+xz" == "z(x+y)". This however  +    does not translate into the situation where "ab+ac" will accept  +    the same LITERAL strings as "z(x+y)" because the symbols are obviously +    different.	     +		    +CREDITS: +Blockhead, CPAN.pm (for the example of how to implement these one liners),  +and #perl on irc.freenode.net for pointing out something I missed when  +trying to copy CPAN one liner majik. + +Perl FLAT and all included modules are released under the same terms as Perl +itself.  Cheers. + +SEE: +http://www.0x743.com/flat + +END +} + +# save to a dat file +sub savedfa { +    my $PRE = shift; +    # neat a better way to get input via stdin +    if (!$PRE) { +      while (<>) { +        chomp; +        $PRE = $_; +        last; +      } +    }  +    use FLAT::Regex::WithExtraOps; +    use FLAT::PFA; +    use FLAT::NFA; +    use FLAT::DFA; +    use Storable; +    # caches results, loads them in if detexted +    my $dfa = FLAT::Regex::WithExtraOps->new($PRE)->as_pfa->as_nfa->as_dfa->as_min_dfa->trim_sinks; +    store $dfa, "$PRE.dat"; +} + +# dumps directed graph using Kundu notation +# Usage: +# perl -MFLAT -e "pfa2directed('a&b&c&d*e*')" +sub test { +  use FLAT::Regex::WithExtraOps; +  use FLAT::PFA; +  use FLAT::NFA; +  use FLAT::DFA; +  # handles multiple strings; first is considered the regex +  if (@_)  +  { my $FA = FLAT::Regex::WithExtraOps->new(shift @_)->as_pfa()->as_nfa->as_dfa();  +    foreach (@_) +    { if ($FA->is_valid_string($_)) { +        print "(+): $_\n"; +      } else { +        print "(-): $_\n"; +      }      +    }  +  } else { +    my $FA; +    while (<STDIN>) { +      chomp; +      if ($. == 1) { #<-- uses first line as regex! +        $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa->as_dfa(); +      } else { +	if ($FA->is_valid_string($_)) { +	  print "(+): $_\n"; +        } else { +	  print "(-): $_\n"; +	}       +      } +    } +  } +} + +# dumps parse tree +# Usage: +# perl -MFLAT -e "dump('re1','re2',...,'reN')" +# perl -MFLAT -e dump < list_of_regexes.dat +sub dump { +  use FLAT::Regex::WithExtraOps; +  use Data::Dumper; +  if (@_)  +  { foreach (@_) +    { my $PRE = FLAT::Regex::WithExtraOps->new($_); +      print Dumper($PRE); }}  +  else     +  { while (<STDIN>)  +     { chomp; +       my $PRE = FLAT::Regex::WithExtraOps->new($_); +       print Dumper($PRE); } +  } +} + +# dumps graphviz notation +# Usage: +# perl -MFLAT -e "dfa2gv('a&b&c&d*e*')" +sub dfa2gv { +  use FLAT::Regex::WithExtraOps; +  use FLAT::DFA; +  use FLAT::NFA; +  use FLAT::PFA;   +  if (@_)  +  { foreach (@_) +    { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa()->as_min_dfa()->trim_sinks(); +      print $FA->as_graphviz;} } +  else     +  { while (<STDIN>)  +     { chomp; +       my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa()->trim_sinks(); +       print $FA->as_graphviz;}  +  } +} + +# dumps graphviz notation +# Usage: +# perl -MFLAT -e "nfa2gv('a&b&c&d*e*')" +sub nfa2gv { +  use FLAT::Regex::WithExtraOps; +  use FLAT::DFA; +  use FLAT::NFA; +  use FLAT::PFA;   +  if (@_)  +  { foreach (@_) +    { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa(); +      print $FA->as_graphviz;} } +  else     +  { while (<STDIN>)  +     { chomp; +       my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa(); +       print $FA->as_graphviz;}  +  } +} + +# dumps graphviz notation +# Usage: +# perl -MFLAT -e "pfa2gv('a&b&c&d*e*')" +sub pfa2gv { +  use FLAT::Regex::WithExtraOps; +  use FLAT::PFA; +  if (@_)  +  { foreach (@_) +    { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa(); +      print $FA->as_graphviz;} } +  else     +  { while (<STDIN>)  +     { chomp; +       my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa(); +       print $FA->as_graphviz;}  +  } +} + +#as_undirected_graphviz + +# dumps graphviz notation +# Usage: +# perl -MFLAT -e "dfa2undgv('a&b&c&d*e*')" +sub dfa2undgv { +  use FLAT::Regex::WithExtraOps; +  use FLAT::DFA; +  use FLAT::NFA; +  use FLAT::PFA;   +  if (@_)  +  { foreach (@_) +    { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa()->as_min_dfa()->trim_sinks(); +      print $FA->as_undirected_graphviz;} } +  else     +  { while (<STDIN>)  +     { chomp; +       my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa()->trim_sinks(); +       print $FA->as_undirected_graphviz;}  +  } +} + +# dumps graphviz notation +# Usage: +# perl -MFLAT -e "nfa2undgv('a&b&c&d*e*')" +sub nfa2undgv { +  use FLAT::Regex::WithExtraOps; +  use FLAT::DFA; +  use FLAT::NFA; +  use FLAT::PFA;   +  if (@_)  +  { foreach (@_) +    { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa(); +      print $FA->as_undirected_graphviz;} } +  else     +  { while (<STDIN>)  +     { chomp; +       my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa(); +       print $FA->as_undirected_graphviz;}  +  } +} + +# dumps graphviz notation +# Usage: +# perl -MFLAT -e "pfa2undgv('a&b&c&d*e*')" +sub pfa2undgv { +  use FLAT::Regex::WithExtraOps; +  use FLAT::PFA; +  if (@_)  +  { foreach (@_) +    { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa(); +      print $FA->as_undirected_graphviz;} } +  else     +  { while (<STDIN>)  +     { chomp; +       my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa(); +       print $FA->as_undirected_graphviz;}  +  } +} + +# dumps directed graph using Kundu notation +# Usage: +# perl -MFLAT -e "dfa2directed('a&b&c&d*e*')" +sub dfa2digraph { +  use FLAT::Regex::WithExtraOps; +  use FLAT::DFA; +  use FLAT::NFA; +  use FLAT::PFA;   +  # trims sink states from min-dfa since transitions are gone  +  if (@_)  +  { foreach (@_) +    { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa->trim_sinks();  +       print $FA->as_digraph;} } +  else     +  { while (<STDIN>)  +     { chomp; +       my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa->trim_sinks(); +       print $FA->as_digraph;}  +  } +  print "\n"; +} + +# dumps directed graph using Kundu notation +# Usage: +# perl -MFLAT -e "nfa2directed('a&b&c&d*e*')" +sub nfa2digraph { +  use FLAT::Regex::WithExtraOps; +  use FLAT::DFA; +  use FLAT::NFA; +  use FLAT::PFA;   +  if (@_)  +  { foreach (@_) +    { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa();  +       print $FA->as_digraph;} } +  else     +  { while (<STDIN>)  +     { chomp; +       my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa(); +       print $FA->as_digraph;}  +  } +  print "\n"; +} + +# dumps directed graph using Kundu notation +# Usage: +# perl -MFLAT -e "pfa2directed('a&b&c&d*e*')" +sub pfa2digraph { +  use FLAT::Regex::WithExtraOps; +  use FLAT::PFA; +  if (@_)  +  { foreach (@_) +    { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa();  +       print $FA->as_digraph;} } +  else     +  { while (<STDIN>)  +     { chomp; +       my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa(); +       print $FA->as_digraph;}  +  } +  print "\n"; +} + +# dumps undirected graph using Kundu notation +# Usage: +# perl -MFLAT -e "dfa2undirected('a&b&c&d*e*')" +sub dfa2undirected { +  use FLAT::Regex::WithExtraOps; +  use FLAT::DFA; +  use FLAT::NFA; +  use FLAT::PFA;   +  # trims sink states from min-dfa since transitions are gone  +  if (@_)  +  { foreach (@_) +    { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa->trim_sinks();  +       print $FA->as_undirected;} } +  else     +  { while (<STDIN>)  +     { chomp; +       my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa()->as_dfa->as_min_dfa->trim_sinks(); +       print $FA->as_undirected;}  +  } +  print "\n"; +} + +# dumps undirected graph using Kundu notation +# Usage: +# perl -MFLAT -e "nfa2undirected('a&b&c&d*e*')" +sub nfa2undirected { +  use FLAT::Regex::WithExtraOps; +  use FLAT::DFA; +  use FLAT::NFA; +  use FLAT::PFA;   +  if (@_)  +  { foreach (@_) +    { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa();  +       print $FA->as_undirected;} } +  else     +  { while (<STDIN>)  +     { chomp; +       my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa()->as_nfa(); +       print $FA->as_undirected;}  +  } +  print "\n"; +} + +# dumps undirected graph using Kundu notation +# Usage: +# perl -MFLAT -e "pfa2undirected('a&b&c&d*e*')" +sub pfa2undirected { +  use FLAT::Regex::WithExtraOps; +  use FLAT::PFA; +  if (@_)  +  { foreach (@_) +    { my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa();  +       print $FA->as_undirected;} } +  else     +  { while (<STDIN>)  +     { chomp; +       my $FA = FLAT::Regex::WithExtraOps->new($_)->as_pfa(); +       print $FA->as_undirected;}  +  } +  print "\n"; +} + +# compares 2 give PREs +# Usage: +# perl -MFLAT -e "compare('a','a&b&c&d*e*')" #<-- no match, btw +sub compare { +  use FLAT::Regex::WithExtraOps; +  use FLAT::DFA; +  use FLAT::PFA; +  my $PFA1 = FLAT::Regex::WithExtraOps->new(shift)->as_pfa(); +  my $PFA2 = FLAT::Regex::WithExtraOps->new(shift)->as_pfa(); +  my $DFA1 = $PFA1->as_nfa->as_min_dfa; +  my $DFA2 = $PFA2->as_nfa->as_min_dfa; +  if ($DFA1->equals($DFA2)) { +    print "Yes\n"; +  } else { +    print "No\n"; +  } +} + +# prints random PRE +# Usage: +# perl -MFLAT -e random_pre +sub random_pre { +  my $and_chance = shift; +  # skirt around deep recursion warning annoyance +  local $SIG{__WARN__} = sub { $_[0] =~ /^Deep recursion/ or warn $_[0] }; +  srand $$; +  my %CMDLINEOPTS = (); +  # Percent chance of each operator occuring +  $CMDLINEOPTS{LENGTH} = 32; +  $CMDLINEOPTS{OR} = 6; +  $CMDLINEOPTS{STAR} = 10; +  $CMDLINEOPTS{OPEN} = 5; +  $CMDLINEOPTS{CLOSE} = 0; +  $CMDLINEOPTS{n} = 1; +  $CMDLINEOPTS{AND} = 10; #<-- default     +  $CMDLINEOPTS{AND} = $and_chance if ($and_chance == 0); #<-- to make it just an re (no shuffle) +   + +  my $getRandomChar = sub { +    my $ch = ''; +    # Get a random character between 0 and 127. +    do { +      $ch = int(rand 2); +    } while ($ch !~ m/[a-zA-Z0-9]/);   +    return $ch; +  }; + +  my $getRandomRE = sub { +    my $str = ''; +    my @closeparens = (); +    for (1..$CMDLINEOPTS{LENGTH}) { +      $str .= $getRandomChar->();   +      # % chance of an "or" +      if (int(rand 100) < $CMDLINEOPTS{OR}) { +	$str .= "|1"; +      } elsif (int(rand 100) < $CMDLINEOPTS{AND}) { +	$str .= "&0"; +      } elsif (int(rand 100) < $CMDLINEOPTS{STAR}) { +	$str .= "*1";      +      } elsif (int(rand 100) < $CMDLINEOPTS{OPEN}) { +	$str .= "("; +	push(@closeparens,'0101)'); +      } elsif (int(rand 100) < $CMDLINEOPTS{CLOSE} && @closeparens) { +	$str .= pop(@closeparens); +      } +    } +    # empty out @closeparens if there are still some left +    if (@closeparens) { +      $str .= join('',@closeparens);   +    } +    return $str; +  }; + +  for (1..$CMDLINEOPTS{n}) { +    print $getRandomRE->(),"\n";   +  }  +} + +# prints random RE (no & operator) +# Usage: +# perl -MFLAT -e random_re +sub random_re { +  shift->random_pre(0); +} + +1; + +__END__ + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and  +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an  +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. + diff --git a/lib/FLAT/CMD/AcyclicStrings.pm b/lib/FLAT/CMD/AcyclicStrings.pm new file mode 100644 index 0000000..ebc8840 --- /dev/null +++ b/lib/FLAT/CMD/AcyclicStrings.pm @@ -0,0 +1,54 @@ +# all strings available via acyclic path from the DFA start state to any all of the final states + +package FLAT::CMD::AcyclicStrings; +use base 'FLAT::CMD'; +use FLAT; +use FLAT::Regex::WithExtraOps; +use FLAT::PFA; +use FLAT::NFA; +use FLAT::DFA; +use Storable; +use Carp; + +# Support for perl one liners - like what CPAN.pm uses #<- should move all to another file +use base 'Exporter'; #instead of: use Exporter (); @ISA = 'Exporter'; +use vars qw(@EXPORT $AUTOLOAD); + +@EXPORT = qw(as_strings); + +sub AUTOLOAD { +    my($l) = $AUTOLOAD; +    $l =~ s/.*:://; +    my(%EXPORT); +    @EXPORT{@EXPORT} = ''; +    if (exists $EXPORT{$l}){ +	FLAT::CMD->$l(@_); +    } +} + +use vars qw(%nodes %dflabel %backtracked %low $lastDFLabel @string $dfa); +# acyclic - no cycles +sub as_strings { +    my $PRE = shift; +    # neat a better way to get input via stdin +    if (!$PRE) { +      while (<>) { +        chomp; +        $PRE = $_; +        last; +      } +    }  +    # caches results, loads them in if detexted +    my $RE = FLAT::Regex::WithExtraOps->new($PRE); +    printf("%s\n",$RE->as_string()); +    if (!-e "$PRE.dat") { +      $dfa = $RE->as_pfa->as_nfa->as_dfa->as_min_dfa->trim_sinks; +      #store $dfa, "$PRE.dat"; +    } else { +      print STDERR "$PRE.dat found.."; +      $dfa = retrieve "$PRE.dat"; +    } +    $dfa->as_acyclic_strings(); +} + +1; diff --git a/lib/FLAT/CMD/DFTStrings.pm b/lib/FLAT/CMD/DFTStrings.pm new file mode 100644 index 0000000..9dc5a59 --- /dev/null +++ b/lib/FLAT/CMD/DFTStrings.pm @@ -0,0 +1,55 @@ +# all strings available via depth first traversal, including back edges that happen to land on an accepting state + +package FLAT::CMD::DFTStrings; +use base 'FLAT::CMD'; +use FLAT; +use FLAT::Regex::WithExtraOps; +use FLAT::PFA; +use FLAT::NFA; +use FLAT::DFA; +use Storable; +use Carp; + +# Support for perl one liners - like what CPAN.pm uses #<- should move all to another file +use base 'Exporter'; #instead of: use Exporter (); @ISA = 'Exporter'; +use vars qw(@EXPORT $AUTOLOAD); + +@EXPORT = qw(as_strings); + +sub AUTOLOAD { +    my($l) = $AUTOLOAD; +    $l =~ s/.*:://; +    my(%EXPORT); +    @EXPORT{@EXPORT} = ''; +    if (exists $EXPORT{$l}){ +	FLAT::CMD->$l(@_); +    } +} + +use vars qw(%nodes %dflabel %backtracked %low $lastDFLabel @string $dfa); +# acyclic - no cycles +sub as_strings { +    my $PRE = shift; +    # neat a better way to get input via stdin +    if (!$PRE) { +      while (<>) { +        chomp; +        $PRE = $_; +        last; +      } +    }  +    # caches results, loads them in if detexted +    my $RE = FLAT::Regex::WithExtraOps->new($PRE); +    printf("%s\n",$RE->as_string()); +    if (!-e "$PRE.dat") { +      $dfa = $RE->as_pfa->as_nfa->as_dfa->as_min_dfa->trim_sinks; +      #store $dfa, "$PRE.dat"; +    } else { +      print STDERR "$PRE.dat found.."; +      $dfa = retrieve "$PRE.dat"; +    } + +    $dfa->as_dft_strings(shift); +} + +1; diff --git a/lib/FLAT/DFA.pm b/lib/FLAT/DFA.pm new file mode 100644 index 0000000..f96c9fb --- /dev/null +++ b/lib/FLAT/DFA.pm @@ -0,0 +1,557 @@ +package FLAT::DFA; + +use strict; +use base 'FLAT::NFA'; +use Storable qw(dclone); +use Carp; +$|++; + +sub set_starting { +    my $self = shift; +    $self->SUPER::set_starting(@_); +     +    my $num = () = $self->get_starting; +    confess "DFA must have exactly one starting state" +        if $num != 1; +} + +sub complement { +    my $self = $_[0]->clone; +     +    for my $s ($self->get_states) { +        $self->is_accepting($s) +            ? $self->unset_accepting($s) +            : $self->set_accepting($s); +    } +     +    return $self; +} + +sub _TUPLE_ID { join "\0", @_ } +sub _uniq { my %seen; grep { !$seen{$_}++ } @_; } + +## this method still needs more work.. +sub intersect { +    my @dfas = map { $_->as_dfa } @_; +     +    my $return = FLAT::DFA->new; +    my %newstates; +    my @alpha = _uniq( map { $_->alphabet } @dfas ); +     +    $_->_extend_alphabet(@alpha) for @dfas; + +    my @start = map { $_->get_starting } @dfas; +    my $start = $newstates{ _TUPLE_ID(@start) } = $return->add_states(1); +    $return->set_starting($start); +    $return->set_accepting($start) +        if ! grep { ! $dfas[$_]->is_accepting( $start[$_] ) } 0 .. $#dfas; + +    my @queue = (\@start); +    while (@queue) { +        my @tuple = @{ shift @queue }; + +        for my $char (@alpha) { +            my @next = map { $dfas[$_]->successors( $tuple[$_], $char ) } +                        0 .. $#dfas; +             +            #warn "[@tuple] --> [@next] via $char\n"; +             +            if (not exists $newstates{ _TUPLE_ID(@next) }) { +                my $s = $newstates{ _TUPLE_ID(@next) } = $return->add_states(1); +                $return->set_accepting($s) +                    if ! grep { ! $dfas[$_]->is_accepting( $next[$_] ) } 0 .. $#dfas; +                push @queue, \@next; +            } +             +            $return->add_transition( $newstates{ _TUPLE_ID(@tuple) }, +                                     $newstates{ _TUPLE_ID(@next) }, +                                     $char ); +        }             +    } + +    return $return;     +} + +# this is meant to enforce 1 starting state for a DFA, but it is getting us into trouble +# when a DFA object calls unset_starting +sub unset_starting { +    my $self = shift;     +    $self->SUPER::unset_starting(@_);     +    my $num = () = $self->unset_starting; +    croak "DFA must have exactly one starting state" +        if $num != 1; +} + +#### transformations + +sub trim_sinks { +  my $self = shift; +  my $result = $self->clone(); +  foreach my $state ($self->array_complement([$self->get_states()],[$self->get_accepting()])) { +    my @ret = $self->successors($state,[$self->alphabet]); +    if (@ret) { +      if ($ret[0] == $state) { +        $result->delete_states($state) if ($result->is_state($state));     +      } +    } +  } +  return $result; +} + +sub as_min_dfa { + +    my $self     = shift()->clone; +    my $N        = $self->num_states; +    my @alphabet = $self->alphabet; + +    my ($start)  = $self->get_starting; +    my %final    = map { $_ => 1 } $self->get_accepting; + +    my @equiv = map [ (0) x ($_+1), (1) x ($N-$_-1) ], 0 .. $N-1; + +    while (1) { +        my $changed = 0; +        for my $s1 (0 .. $N-1) { +        for my $s2 (grep { $equiv[$s1][$_] } 0 .. $N-1) { +             +            if ( 1 == grep defined, @final{$s1, $s2} ) { +                $changed = 1; +                $equiv[$s1][$s2] = 0; +                next; +            } +             +            for my $char (@alphabet) { +                my @t = sort { $a <=> $b } $self->successors([$s1,$s2], $char); +                next if @t == 1; +                 +                if (not $equiv[ $t[0] ][ $t[1] ]) { +                    $changed = 1; +                    $equiv[$s1][$s2] = 0; +                } +            } +        }} +         +        last if !$changed; +    } +    my $result = (ref $self)->new; +    my %newstate; +    my @classes; +    for my $s (0 .. $N-1) { +        next if exists $newstate{$s}; +         +        my @c = ( $s, grep { $equiv[$s][$_] } 0 .. $N-1 ); +        push @classes, \@c; + +        @newstate{@c} = ( $result->add_states(1) ) x @c; +    } + +    for my $c (@classes) { +        my $s = $c->[0]; +        for my $char (@alphabet) { +            my ($next) = $self->successors($s, $char); +            $result->add_transition( $newstate{$s}, $newstate{$next}, $char ); +        } +    } +     +    $result->set_starting( $newstate{$start} ); +    $result->set_accepting( $newstate{$_} ) +        for $self->get_accepting; +     +    $result; + +} + +# the validity of a given string <-- executes symbols over DFA +# if there is not transition for given state and symbol, it fails immediately +# if the current state we're in is not final when symbols are exhausted, then it fails + +sub is_valid_string { +  my $self = shift; +  my $string = shift; +  chomp $string; +  my $OK = undef; +  my @stack = split('',$string); +  # this is confusing all funcs return arrays +  my @current = $self->get_starting(); +  my $current = pop @current; +  foreach (@stack) { +    my @next = $self->successors($current,$_);     +    if (!@next) { +      return $OK; #<--returns undef bc no transition found +    } +    $current = $next[0]; +  } +  $OK++ if ($self->is_accepting($current)); +  return $OK; +} + +# +# Experimental!! +# + +# DFT stuff in preparation for DFA pump stuff; +sub as_node_list { +    my $self = shift; +    my %node = (); +    for my $s1 ($self->get_states) { +      $node{$s1} = {}; # initialize +      for my $s2 ($self->get_states) { +         my $t = $self->get_transition($s1, $s2); +         if (defined $t) { +           # array of symbols that $s1 will go to $s2 on... +	   push(@{$node{$s1}{$s2}},split(',',$t->as_string));  +         } +      } +    } +  return %node; +} + +sub as_acyclic_strings { +    my $self = shift; +    my %dflabel       = (); # lookup table for dflable +    my %backtracked   = (); # lookup table for backtracked edges +    my $lastDFLabel   = 0; +    my @string        = (); +    my %nodes         = $self->as_node_list(); +    # output format is the actual PRE followed by all found strings +    $self->acyclic($self->get_starting(),\%dflabel,$lastDFLabel,\%nodes,\@string); +} + +sub acyclic { +  my $self = shift; +  my $startNode = shift; +  my $dflabel_ref = shift; +  my $lastDFLabel = shift; +  my $nodes = shift; +  my $string = shift; +  # tree edge detection +  if (!exists($dflabel_ref->{$startNode})) { +    $dflabel_ref->{$startNode} = ++$lastDFLabel;  # the order inwhich this link was explored +    foreach my $adjacent (keys(%{$nodes->{$startNode}})) { +      if (!exists($dflabel_ref->{$adjacent})) {      # initial tree edge +        foreach my $symbol (@{$nodes->{$startNode}{$adjacent}}) { +	  push(@{$string},$symbol); +          $self->acyclic($adjacent,\%{$dflabel_ref},$lastDFLabel,\%{$nodes},\@{$string}); +	  if ($self->array_is_subset([$adjacent],[$self->get_accepting()])) { #< proof of concept +            printf("%s\n",join('',@{$string})); +	  } +	  pop(@{$string}); +        } +      } +    }  +  } +  # remove startNode entry to facilitate acyclic path determination +  delete($dflabel_ref->{$startNode}); +  #$lastDFLabel--; +  return;      +}; + +sub as_dft_strings { +  my $self = shift; +  my $depth = 1; +  $depth = shift if (1 < $_[0]); +  my %dflabel        = (); # scoped lookup table for dflable +  my %nodes          = $self->as_node_list(); +  foreach (keys(%nodes)) { +    $dflabel{$_} = []; # initialize container (array) for multiple dflables for each node +  } +  my $lastDFLabel    =  0; +  my @string         = (); +  $self->dft($self->get_starting(),[$self->get_accepting()],\%dflabel,$lastDFLabel,\%nodes,\@string,$depth);  +} + +sub dft { +  my $self = shift; +  my $startNode    = shift; +  my $goals_ref    = shift; +  my $dflabel_ref  = shift; +  my $lastDFLabel  = shift; +  my $nodes        = shift; +  my $string       = shift; +  my $DEPTH        = shift; +  # add start node to path +  my $c1 = @{$dflabel_ref->{$startNode}}; # get number of elements +  if ($DEPTH >= $c1) {   +    push(@{$dflabel_ref->{$startNode}},++$lastDFLabel); +    foreach my $adjacent (keys(%{$nodes->{$startNode}})) { +      my $c2 = @{$dflabel_ref->{$adjacent}}; +      if ($DEPTH > $c2) {   # "initial" tree edge +        foreach my $symbol (@{$nodes->{$startNode}{$adjacent}}) { +	  push(@{$string},$symbol); +	  $self->dft($adjacent,[@{$goals_ref}],$dflabel_ref,$lastDFLabel,$nodes,[@{$string}],$DEPTH); +	  # assumes some base path found +          if ($self->array_is_subset([$adjacent],[@{$goals_ref}])) {  +            printf("%s\n",join('',@{$string}));     +  	  }  +          pop(@{$string});  +        }  +      } +    } # remove startNode entry to facilitate acyclic path determination +    pop(@{$dflabel_ref->{$startNode}}); +    $lastDFLabel--; +  }     +}; + +# +# String gen using iterators (still experimental) +# + +sub get_acyclic_sub { +  my $self = shift; +  my ($start,$nodelist_ref,$dflabel_ref,$string_ref,$accepting_ref,$lastDFLabel) = @_; +  my @ret = (); +  foreach my $adjacent (keys(%{$nodelist_ref->{$start}})) { +    $lastDFLabel++; +    if (!exists($dflabel_ref->{$adjacent})) { +      $dflabel_ref->{$adjacent} = $lastDFLabel; +      foreach my $symbol (@{$nodelist_ref->{$start}{$adjacent}}) {  +        push(@{$string_ref},$symbol); +      	my $string_clone = dclone($string_ref); +        my $dflabel_clone = dclone($dflabel_ref); +        push(@ret,sub { return $self->get_acyclic_sub($adjacent,$nodelist_ref,$dflabel_clone,$string_clone,$accepting_ref,$lastDFLabel); });  +        pop @{$string_ref}; +      } +    }  +  +  } +  return {substack=>[@ret], +          lastDFLabel=>$lastDFLabel, +          string => ($self->array_is_subset([$start],[@{$accepting_ref}]) ? join('',@{$string_ref}) : undef)}; +} +sub init_acyclic_iterator { +  my $self = shift; +  my %dflabel = ();  +  my @string  = ();  +  my $lastDFLabel = 0;  +  my %nodelist = $self->as_node_list();  +  my @accepting = $self->get_accepting(); +  # initialize +  my @substack = (); +  my $r = $self->get_acyclic_sub($self->get_starting(),\%nodelist,\%dflabel,\@string,\@accepting,$lastDFLabel); +  push(@substack,@{$r->{substack}}); +  return sub { +    while (1) { +      if (!@substack) { +        return undef; +      } +      my $s = pop @substack; +      my $r = $s->(); +      push(@substack,@{$r->{substack}});  +      if ($r->{string}) { +       return $r->{string}; +      } +    } +  } +} + +sub new_acyclic_string_generator { +  my $self = shift; +  return $self->init_acyclic_iterator(); +} + +sub get_deepdft_sub { +  my $self = shift; +  my ($start,$nodelist_ref,$dflabel_ref,$string_ref,$accepting_ref,$lastDFLabel,$max) = @_; +  my @ret = (); +  my $c1 = @{$dflabel_ref->{$start}}; +  if ($c1 < $max) { +    push(@{$dflabel_ref->{$start}},++$lastDFLabel); +    foreach my $adjacent (keys(%{$nodelist_ref->{$start}})) { +      my $c2 = @{$dflabel_ref->{$adjacent}}; +      if ($c2 < $max) { +        foreach my $symbol (@{$nodelist_ref->{$start}{$adjacent}}) {  +          push(@{$string_ref},$symbol); +          my $string_clone = dclone($string_ref); +          my $dflabel_clone = dclone($dflabel_ref); +          push(@ret,sub { return $self->get_deepdft_sub($adjacent,$nodelist_ref,$dflabel_clone,$string_clone,$accepting_ref,$lastDFLabel,$max); });  +          pop @{$string_ref}; +        } +      } +    } +  } +  return {substack=>[@ret], lastDFLabel=>$lastDFLabel, string => ($self->array_is_subset([$start],[@{$accepting_ref}]) ? join('',@{$string_ref}) : undef)}; +} +  +sub init_deepdft_iterator { +  my $self = shift; +  my $MAXLEVEL = shift; +  my %dflabel = ();  +  my @string  = ();  +  my $lastDFLabel = 0;  +  my %nodelist = $self->as_node_list();  +  foreach my $node (keys(%nodelist)) { +    $dflabel{$node} = []; # initializes anonymous arrays for all nodes +  } +  my @accepting = $self->get_accepting(); +  # initialize +  my @substack = (); +  my $r = $self->get_deepdft_sub($self->get_starting(),\%nodelist,\%dflabel,\@string,\@accepting,$lastDFLabel,$MAXLEVEL); +  push(@substack,@{$r->{substack}}); +  return sub { +    while (1) { +      if (!@substack) { +        return undef; +      } +      my $s = pop @substack; +      my $r = $s->(); +      push(@substack,@{$r->{substack}});  +      if ($r->{string}) { +       return $r->{string}; +      } +    } +  } +} + +sub new_deepdft_string_generator { +  my $self = shift; +  my $MAXLEVEL = (@_ ? shift : 1); +  return $self->init_deepdft_iterator($MAXLEVEL); +} + +1; + +__END__ + +=head1 NAME + +FLAT::DFA - Deterministic finite automata + +=head1 SYNOPSIS + +A FLAT::DFA object is a finite automata whose transitions are labeled +with single characters. Furthermore, each state has exactly one outgoing +transition for each available label/character.  + +=head1 USAGE + +In addition to implementing the interface specified in L<FLAT> and L<FLAT::NFA>,  +FLAT::DFA objects provide the following DFA-specific methods: + +=over + +=item $dfa-E<gt>unset_starting + +Because a DFA, by definition, must have only ONE starting state, this allows one to unset +the current start state so that a new one may be set. + +=item $dfa-E<gt>trim_sinks + +This method returns a FLAT::DFA (though in theory an NFA) that is lacking a transition for  +all symbols from all states.  This method eliminates all transitions from all states that lead +to a sink state; it also eliminates the sink state. + +This has no affect on testing if a string is valid using C<FLAT::DFA::is_valid_string>,  +discussed below. + +=item $dfa-E<gt>as_min_dfa + +This method minimizes the number of states and transitions in the given DFA. The modifies +the current/calling DFA object. + +=item $dfa-E<gt>is_valid_string($string) + +This method tests if the given string is accepted by the DFA. + +=item $dfa-E<gt>as_node_list + +This method returns a node list in the form of a hash. This node list may be viewed as a  +pure digraph, and is lacking in state names and transition symbols. + +=item $dfa-E<gt>as_acyclic_strings + +The method is B<deprecated>, and it is suggested that one not use it.  It returns all  +valid strings accepted by the DFA by exploring all acyclic paths that go from the start +state and end in an accepting state.  The issue with this method is that it finds and +returns all strings at once.  The iterator described below is much more ideal for actual +use in an application. + +=item $dfa-E<gt>as_dft_strings($depth) + +The method is B<deprecated>, and it is suggested that one not use it.  It returns all  +valid strings accepted by the DFA using a depth first traversal.  A valid string is formed +when the traversal detects an accepting state, whether it is a terminal node or a node reached +via a back edge.  The issue with this method is that it finds and returns all strings at once.   +The iterator described below is much more ideal for actual use in an application. + +The argument, C<$depth> specifies how many times the traversal may actually pass through +a previously visited node.  It is therefore possible to safely explore DFAs that accept +infinite languages. + +=item $dfa-E<gt>new_acyclic_string_generator + +This allows one to initialize an iterator that returns a valid string on each successive +call of the sub-ref that is returned. It returns all valid strings accepted by the DFA by  +exploring all acyclic paths that go from the start state and end in an accepting state. + +Example: + + #!/usr/bin/env perl + use strict;  + use FLAT::DFA; + use FLAT::NFA; + use FLAT::PFA; + use FLAT::Regex::WithExtraOps;  + + my $PRE = "abc&(def)*"; + my $dfa = FLAT::Regex::WithExtraOps->new($PRE)->as_pfa->as_nfa->as_dfa->as_min_dfa->trim_sinks;  + my $next = $dfa->new_acyclic_string_generator;  + print "PRE: $PRE\n"; + print "Acyclic:\n"; + while (my $string = $next->()) { +   print "  $string\n"; + } + +=item $dfa-E<gt>new_deepdft_string_generator($depth) + +This allows one to initialize an iterator that returns a valid string on each successive +call of the sub-ref that is returned. It returns all valid strings accepted by the DFA using a  +depth first traversal.  A valid string is formed when the traversal detects an accepting state,  +whether it is a terminal node or a node reached via a back edge. + +The argument, C<$depth> specifies how many times the traversal may actually pass through +a previously visited node.  It is therefore possible to safely explore DFAs that accept +infinite languages. + + #!/usr/bin/env perl + use strict;  + use FLAT::DFA; + use FLAT::NFA; + use FLAT::PFA; + use FLAT::Regex::WithExtraOps;  + + my $PRE = "abc&(def)*"; + my $dfa = FLAT::Regex::WithExtraOps->new($PRE)->as_pfa->as_nfa->as_dfa->as_min_dfa->trim_sinks;  + my $next = $dfa->new_deepdft_string_generator(); + print "Deep DFT (default):\n"; + for (1..10) { +  while (my $string = $next->()) { +    print "  $string\n"; +    last; +   } + } + + $next = $dfa->new_deepdft_string_generator(5); + print "Deep DFT (5):\n"; + for (1..10) { +   while (my $string = $next->()) { +     print "  $string\n"; +     last; +   } + } + +=back + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and  +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an  +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. diff --git a/lib/FLAT/FA.pm b/lib/FLAT/FA.pm new file mode 100644 index 0000000..dd77f50 --- /dev/null +++ b/lib/FLAT/FA.pm @@ -0,0 +1,554 @@ +package FLAT::FA; + +use strict; +use base 'FLAT'; +use Carp; + +use FLAT::Transition; + +=head1 NAME + +FLAT::FA - Base class for regular finite automata + +=head1 SYNOPSIS + +A FLAT::FA object is a collection of states and transitions. Each state +may be labeled as starting or accepting. Each transition between states +is labeled with a transition object. + +=head1 USAGE + +FLAT::FA is a superclass that is not intended to be used directly. However, +it does provide the following methods: + +=cut + +sub new { +    my $pkg = shift; +    bless { +        STATES => [], +        TRANS  => [], +        ALPHA  => {} +    }, $pkg; +} + +sub get_states { +    my $self = shift; +    return 0 .. ($self->num_states - 1); +} + +sub num_states { +    my $self = shift; +    return scalar @{ $self->{STATES} }; +} + +sub is_state { +    my ($self, $state) = @_; +    exists $self->{STATES}->[$state]; +} + +sub _assert_states { +    my ($self, @states) = @_; +    for (@states) { +        croak "'$_' is not a state" if not $self->is_state($_); +    } +} +sub _assert_non_states { +    my ($self, @states) = @_; +    for (@states) { +        croak "There is already a state called '$_'" if $self->is_state($_);     +    } +} + +sub delete_states { +    my ($self, @states) = @_; +     +    $self->_assert_states(@states); + +    for my $s ( sort { $b <=> $a } @states ) { +        $self->_decr_alphabet($_) +            for @{ splice @{ $self->{TRANS} }, $s, 1  }; + +        $self->_decr_alphabet( splice @$_, $s, 1 ) +            for @{ $self->{TRANS} }; +             +        splice @{ $self->{STATES} }, $s, 1; +    } +} + +sub add_states { +    my ($self, $num) = @_; +    my $id = $self->num_states; +     +    for my $s ( $id .. ($id+$num-1) ) { +        push @$_, undef for @{ $self->{TRANS} }; +        push @{ $self->{TRANS} }, [ (undef) x ($s+1) ]; +        push @{ $self->{STATES} }, { +            starting => 0, +            accepting => 0 +        }; +    } +     +    return wantarray ? ($id .. ($id+$num-1)) +                     : $id+$num-1; +} + +############## + +sub is_starting { +    my ($self, $state) = @_; +    $self->_assert_states($state); +    return $self->{STATES}[$state]{starting}; +} +sub set_starting { +    my ($self, @states) = @_; +    $self->_assert_states(@states); +    $self->{STATES}[$_]{starting} = 1 for @states; +} +sub unset_starting { +    my ($self, @states) = @_; +    $self->_assert_states(@states); +    $self->{STATES}[$_]{starting} = 0 for @states; +} +sub get_starting { +    my $self = shift; +    return grep { $self->is_starting($_) } $self->get_states; +} + +############## + +sub is_accepting { +    my ($self, $state) = @_; +    $self->_assert_states($state); +    return $self->{STATES}[$state]{accepting}; +} +sub set_accepting { +    my ($self, @states) = @_; +    $self->_assert_states(@states); +    $self->{STATES}[$_]{accepting} = 1 for @states; +} +sub unset_accepting { +    my ($self, @states) = @_; +    $self->_assert_states(@states); +    $self->{STATES}[$_]{accepting} = 0 for @states; +} +sub get_accepting { +    my $self = shift; +    return grep { $self->is_accepting($_) } $self->get_states; +} + +############### + +sub _decr_alphabet { +    my ($self, $t) = @_; +    return if not defined $t; +    for ($t->alphabet) { +        delete $self->{ALPHA}{$_} if not --$self->{ALPHA}{$_}; +    } +} +sub _incr_alphabet { +    my ($self, $t) = @_; +    return if not defined $t; +    $self->{ALPHA}{$_}++ for $t->alphabet; +} + +sub set_transition { +    my ($self, $state1, $state2, @label) = @_; +    $self->remove_transition($state1, $state2); + +    @label = grep defined, @label; +    return if not @label; +     +    my $t = $self->{TRANS_CLASS}->new(@label); +    $self->_incr_alphabet($t); + +    $self->{TRANS}[$state1][$state2] = $t; +} + +sub add_transition { +    my ($self, $state1, $state2, @label) = @_; + +    @label = grep defined, @label; +    return if not @label; + +    my $t = $self->get_transition($state1, $state2); +    $self->_decr_alphabet($t); +     +    if (!$t) { +        $t = $self->{TRANS}[$state1][$state2] = $self->{TRANS_CLASS}->new; +    } +     +    $t->add(@label); +    $self->_incr_alphabet($t); +} + +sub get_transition { +    my ($self, $state1, $state2) = @_; +    $self->_assert_states($state1, $state2); +     +    $self->{TRANS}[$state1][$state2]; +} + +sub remove_transition { +    my ($self, $state1, $state2) = @_; + +    $self->_decr_alphabet( $self->{TRANS}[$state1][$state2] ); +    $self->{TRANS}[$state1][$state2] = undef; +} + +# given a state and a symbol, it tells you  +# what the next state(s) are; do get successors  +# for find the successors for a set of symbols,  +# use array refs.  For example: +# @NEXT=$self->successors([@nodes],[@symbols]); +sub successors { +    my ($self, $state, $symb) = @_; +     +    my @states = ref $state eq 'ARRAY' ? @$state : ($state); +    my @symbs  = defined $symb +                  ? (ref $symb  eq 'ARRAY' ? @$symb  : ($symb)) +                  : (); +         +    $self->_assert_states(@states); +     +    my %succ; +    for my $s (@states) { +        $succ{$_}++ +            for grep { my $t = $self->{TRANS}[$s][$_]; +                       defined $t && (@symbs ? $t->does(@symbs) : 1) } $self->get_states; +    } +     +    return keys %succ; +} + +sub predecessors { +    my $self = shift; +    $self->clone->reverse->successors(@_); +} + +# reverse  - no change from NFA +sub reverse { +    my $self = $_[0]->clone; +    $self->_transpose; +     +    my @start = $self->get_starting; +    my @final = $self->get_accepting; +     +    $self->unset_accepting( $self->get_states ); +    $self->unset_starting( $self->get_states ); +     +    $self->set_accepting( @start ); +    $self->set_starting( @final ); +     +    $self; +} + +# get an array of all symbols +sub alphabet { +    my $self = shift; +    grep length, keys %{ $self->{ALPHA} }; +} + +# give an array of symbols, return the symbols that +# are in the alphabet +#sub is_in_alphabet { +#  my $self = shift; +#  my $ +#} + +############ +sub prune { +    my $self = shift; +     +    my @queue = $self->get_starting; +    my %seen  = map { $_ => 1 } @queue; +     +    while (@queue) { +        @queue = grep { ! $seen{$_}++ } $self->successors(\@queue); +    } +     +    my @useless = grep { !$seen{$_} } $self->get_states; +    $self->delete_states(@useless); +     +    return @useless; +} + + +############ + +use Storable 'dclone'; +sub clone { +    dclone( $_[0] ); +} + +sub _transpose { +    my $self = shift; +    my $N = $self->num_states - 1; +     +    $self->{TRANS} = [ +        map { +            my $row = $_;  +            [ map { $_->[$row] } @{$self->{TRANS}} ] +        } 0 .. $N +    ]; +} + +# tests to see if set1 is a subset of set2 +sub array_is_subset { +  my $self = shift; +  my $set1 = shift; +  my $set2 = shift; +  my $ok = 1; +  my %setcount = (); +  foreach ($self->array_unique(@{$set1}),$self->array_unique(@{$set2})) { +    $setcount{$_}++; +  } +  foreach ($self->array_unique(@{$set1})) { +    if ($setcount{$_} != 2) { +      $ok = 0; +      last; +    } +  } +  return $ok; +} + +sub array_unique { +  my $self = shift; +  my %ret = (); +  foreach (@_) { +    $ret{$_}++; +  } +  return keys(%ret); +} + +sub  array_complement { +  my $self = shift; +  my $set1 = shift; +  my $set2 = shift; +  my @ret = (); +  # convert set1 to a hash +  my %set1hash = map {$_ => 1} @{$set1}; +  # iterate of set2 and test if $set1 +  foreach (@{$set2}) { +    if (!defined $set1hash{$_}) { +      push(@ret,$_); +    } +  } +  ## Now do the same using $set2 +  # convert set2 to a hash +  my %set2hash = map {$_ => 1} @{$set2}; +  # iterate of set1 and test if $set1 +  foreach (@{$set1}) { +    if (!defined $set2hash{$_}) { +      push(@ret,$_); +    } +  } +  # now @ret contains all items in $set1 not in $set 2 and all +  # items in $set2 not in $set1 +  return @ret;   +} + +# returns all items that 2 arrays have in common +sub array_intersect { +  my $self = shift; +  my $set1 = shift; +  my $set2 = shift; +  my %setcount = (); +  my @ret = (); +  foreach ($self->array_unique(@{$set1})) { +    $setcount{$_}++; +  } +  foreach ($self->array_unique(@{$set2})) { +    $setcount{$_}++; +    push(@ret,$_) if ($setcount{$_} > 1);  +  } +  return @ret; +} + +# given a set of symbols, returns only the valid ones +sub get_valid_symbols { +  my $self = shift; +  my $symbols = shift; +  return $self->array_intersect([$self->alphabet()],[@{$symbols}]) +} + +## add an FA's states & transitions to this FA (as disjoint union) +sub _swallow { +    my ($self, $other) = @_; +    my $N1 = $self->num_states; +    my $N2 = $other->num_states; +     +    push @$_, (undef) x $N2 +        for @{ $self->{TRANS} }; + +    push @{ $self->{TRANS} }, [ (undef) x $N1, @{ clone $_ } ] +        for @{ $other->{TRANS} }; + +    push @{ $self->{STATES} }, @{ clone $other->{STATES} }; +     +    $self->{ALPHA}{$_} += $other->{ALPHA}{$_} +        for keys %{ $other->{ALPHA} }; +     +    return map { $_ + $N1 } $other->get_states; +} + +1; + +__END__ + + +=head2 Manipulation & Inspection Of States + +=over + +=item $fa-E<gt>get_states + +Returns a list of all the state "names" in $fa. + +=item $fa-E<gt>num_states + +Returns the number of states in $fa. + +=item $fa-E<gt>is_state($state_id) + +Returns a boolean indicating whether $state_id is a recognized state "name." + +=item $fa-E<gt>delete_states(@states) + +Deletes the states given in @states and their corresponding transitions. The +remaining states in the FA may be "renamed" (renumbered)! Return value not +used. + +=item $fa-E<gt>add_states($num) + +Adds $num states to $fa, and returns a list of the new state "names." + +=item $fa-E<gt>get_starting + +=item $fa-E<gt>get_accepting + +Returns a list of all the states which are labeled as starting/accepting, +respectively. + +=item $fa-E<gt>set_accepting(@states) + +=item $fa-E<gt>unset_accepting(@states) + +=item $fa-E<gt>set_starting(@states) + +=item $fa-E<gt>unset_starting(@states) + +Sets/unsets a list of states as being labeled starting/accepting, +respectively. + +=item $fa-E<gt>is_starting($state) + +=item $fa-E<gt>is_accepting($state) + +Returns a boolean indicating whether $state is labeled as starting/accepting, +respectively. + +=item $fa-E<gt>prune + +Deletes the states which are not reachable (via zero or more transitions) +from starting states. Returns a list of the "names" of states that were +deleted. + +=back + +=head2 Manipulation & Inspection Of Transitions + +Each transition between states is a transition object, which knows how +to organize several "labels." Think of this as the mechanism by which +multiple arrows in the state diagram between the same states are collapsed +to a single arrow. This interface is abstracted away into the following +public methods: + +=over + +=item $fa-E<gt>set_transition($state1, $state2, @labels) + +Resets the transition between $state1 and $state2 to a transition +initialized using data @labels. If @labels is omitted or contains +only undefined elements, then the call is equivalent to C<remove_transition>. + +=item $fa-E<gt>add_transition($state1, $state2, @labels) + +Adds @labels to the transition between $state1 and $state2. + +=item $fa-E<gt>get_transition($state1, $state2) + +Returns the transition object stored between $state1 and $state2, or +undef if there is no transition. + +=item $fa-E<gt>remove_transition($state1, $state2) + +Removes the transition object between $state1 and $state2. + +=item $fa-E<gt>successors(\@states) + +=item $fa-E<gt>successors($state) + +=item $fa-E<gt>successors(\@states, $label) + +=item $fa-E<gt>successors($state, $label) + +=item $fa-E<gt>successors(\@states, \@labels) + +=item $fa-E<gt>successors($state, \@labels) + +Given a state/set of states, and one or more labels, returns a list of +the states (without duplicates) reachable from the states via a single +transition having any of the given labels. If no labels are given, returns +the states reachable by any (single) transition. + +Note that this method makes no distinction for epsilon transitions, these +are only special in FLAT::NFA objects. + +=item $fa-E<gt>alphabet + +Returns the list of characters (without duplicates) used among all +transition labels in the automaton. + +=back + +=head2 Conversions To External Formats + +=over + +=item $fa-E<gt>as_graphviz + +Returns a string containing a GraphViz (dot) description of the automaton, +suitable for rendering with your favorite GraphViz layout engine. + +=item $fa-E<gt>as_summary + +Returns a string containing a plaintext description of the automaton, +suitable for debugging purposes. + +=back + +=head2 Miscellaneous + +=over + +=item $fa-E<gt>clone + +Returns an identical copy of $fa. + +=back + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and  +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an  +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. diff --git a/lib/FLAT/NFA.pm b/lib/FLAT/NFA.pm new file mode 100644 index 0000000..344ea76 --- /dev/null +++ b/lib/FLAT/NFA.pm @@ -0,0 +1,509 @@ +package FLAT::NFA; + +use strict; +use base 'FLAT::FA'; + +use FLAT::Transition; + +=head1 NAME + +FLAT::NFA - Nondeterministic finite automata + +=head1 SYNOPSIS + +A FLAT::NFA object is a finite automata whose transitions are labeled +either with characters or the empty string (epsilon). + +=cut + +sub new { +    my $pkg = shift; +    my $self = $pkg->SUPER::new(@_); +    $self->{TRANS_CLASS} = "FLAT::Transition"; +    return $self; +} + +sub singleton { +    my ($class, $char) = @_; +    my $nfa = $class->new; + +    if (not defined $char) { +        $nfa->add_states(1); +        $nfa->set_starting(0); +    } elsif ($char eq "") { +        $nfa->add_states(1); +        $nfa->set_starting(0); +        $nfa->set_accepting(0); +    } else { +        $nfa->add_states(2); +        $nfa->set_starting(0); +        $nfa->set_accepting(1); +        $nfa->set_transition(0, 1, $char); +    } +    return $nfa; +} + +sub as_nfa { $_[0]->clone } + +sub union { +    my @nfas = map { $_->as_nfa } @_;     +    my $result = $nfas[0]->clone;     +    $result->_swallow($_) for @nfas[1 .. $#nfas]; +    $result; +} + +sub concat { +    my @nfas = map { $_->as_nfa } @_; +     +    my $result = $nfas[0]->clone; +    my @newstate = ([ $result->get_states ]); +    my @start = $result->get_starting; + +    for (1 .. $#nfas) { +        push @newstate, [ $result->_swallow( $nfas[$_] ) ]; +    } + +    $result->unset_accepting($result->get_states); +    $result->unset_starting($result->get_states); +    $result->set_starting(@start); +     +    for my $nfa_id (1 .. $#nfas) { +        for my $s1 ($nfas[$nfa_id-1]->get_accepting) { +        for my $s2 ($nfas[$nfa_id]->get_starting) { +            $result->set_transition( +                $newstate[$nfa_id-1][$s1], +                $newstate[$nfa_id][$s2], "" ); +        }} +    } + +    $result->set_accepting( +        @{$newstate[-1]}[ $nfas[-1]->get_accepting ] ); + +    $result; +} + +sub kleene { +    my $result = $_[0]->clone; +     +    my ($newstart, $newfinal) = $result->add_states(2); +     +    $result->set_transition($newstart, $_, "") +        for $result->get_starting; +    $result->unset_starting( $result->get_starting ); +    $result->set_starting($newstart); + +    $result->set_transition($_, $newfinal, "") +        for $result->get_accepting; +    $result->unset_accepting( $result->get_accepting ); +    $result->set_accepting($newfinal); + +    $result->set_transition($newstart, $newfinal, "");     +    $result->set_transition($newfinal, $newstart, ""); +     +    $result; +} + +sub reverse { +    my $self = $_[0]->clone; +    $self->_transpose; +     +    my @start = $self->get_starting; +    my @final = $self->get_accepting; +     +    $self->unset_accepting( $self->get_states ); +    $self->unset_starting( $self->get_states ); +     +    $self->set_accepting( @start ); +    $self->set_starting( @final ); +     +    $self; +} + +########### + +sub is_empty { +    my $self = shift; +     +    my @queue = $self->get_starting; +    my %seen = map { $_ => 1 } @queue; +     +    while (@queue) { +        return 0 if grep { $self->is_accepting($_) } @queue; +        @queue = grep { !$seen{$_}++ } $self->successors(\@queue); +    } +    return 1; +} + +sub is_finite { +    my $self = shift; +     +    my @alphabet = $self->alphabet; +    return 1 if @alphabet == 0; +     +    my @queue = $self->get_starting; +    my %seen = map { $_ => 1 } @queue; +     +    while (@queue) { +        @queue = grep { !$seen{$_}++ } $self->successors(\@queue); +    } +     +    for my $s ( grep { $self->is_accepting($_) } keys %seen ) { +        @queue = $self->epsilon_closure($s); +        %seen  = map { $_ => 1 } @queue; +         +        while (@queue) { +            my @next = $self->epsilon_closure( +                            $self->successors(\@queue, \@alphabet) ); + +            return 0 if grep { $s eq $_ } @next; +            @queue = grep { !$seen{$_}++ } @next; +        } +    } +    return 1; +} + +sub epsilon_closure { +    my ($self, @states) = @_; +    my %seen  = map { $_ => 1 } @states; +    my @queue = @states; +     +    while (@queue) { +        @queue = grep { ! $seen{$_}++ } $self->successors( \@queue, "" ); +    } +     +    keys %seen; +} + + +sub contains { +    my ($self, $string) = @_; + +    my @active = $self->epsilon_closure( $self->get_starting );     +    for my $char (split //, $string) { +        return 0 if ! @active; +        @active = $self->epsilon_closure( $self->successors(\@active, $char) ); +    } +    return !! grep { $self->is_accepting($_) } @active; +} + +sub trace { +    my ($self, $string) = @_; + +    my @trace = ([ $self->epsilon_closure( $self->get_starting ) ]); +     +    for my $char (split //, $string) { +        push @trace, +            [ $self->epsilon_closure( $self->successors($trace[-1], $char) ) ]; +    } +    return @trace; +} +############ + +sub _extend_alphabet { +    my ($self, @alpha) = @_; +     +    my %alpha = map { $_ => 1 } @alpha; +    delete $alpha{$_} for $self->alphabet; + +    return if not keys %alpha; + +    my $trash = $self->add_states(1); +    for my $state ($self->get_states) { +        next if $state eq $trash; +        for my $char (keys %alpha) { +            $self->add_transition($state, $trash, $char); +        } +    } +    $self->add_transition($trash, $trash, $self->alphabet); +} + +######## transformations + +# subset construction +sub as_dfa { +    my $self = shift; +     +    my $result = FLAT::DFA->new; +    my %subset; +     +    my %final = map { $_ => 1 } $self->get_accepting; +    my @start = sort { $a <=> $b } $self->epsilon_closure( $self->get_starting ); + +    my $start = $subset{ _SET_ID(@start) } = $result->add_states(1); +    $result->set_starting($start); +     +    $result->set_accepting( $start ) +        if grep $_, @final{@start}; + +    my @queue = (\@start); +    while (@queue) { +        my @states = @{ shift @queue }; +        my $S      = $subset{ _SET_ID(@states) }; +         +        for my $symb ($self->alphabet) { +            my @to = $self->epsilon_closure( +                            $self->successors(\@states, $symb) ); + +            if ( not exists $subset{_SET_ID(@to)} ) { +                push @queue, \@to; +                my $T = $subset{_SET_ID(@to)} = $result->add_states(1); +                $result->set_accepting($T) +                    if grep $_, @final{@to}; +            } +             +            $result->add_transition($S, $subset{ _SET_ID(@to) }, $symb); +        } +    } + +    $result; +} + +############ Formatted output + +# Format that Dr. Sukhamay KUNDU likes to use in his assignments :) +# This format is just a undirected graph - so transition and state info is lost + +sub as_undirected { +    my $self = shift; +    my @symbols = $self->alphabet(); +    my @states = $self->get_states();  +    my %edges = (); +    foreach (@states)  { +      my $s = $_; +      foreach (@symbols) { +        my $a = $_; +	# foreach state, get all nodes connected to it; ignore symbols and +	# treat transitions simply as directed +	push(@{$edges{$s}},$self->successors($s,$a)); +	foreach ($self->successors($s,$a)) { +  	  push(@{$edges{$_}},$s);	 +	} +      } +    } +    my @lines = (($#states+1)); +    foreach (sort{$a <=> $b;}(keys(%edges))) { #<-- iterate over numerically sorted list of keys +      @{$edges{$_}} = sort {$a <=> $b;} $self->array_unique(@{$edges{$_}}); #<- make items unique and sort numerically +      push(@lines,sprintf("%s(%s):%s",$_,($#{$edges{$_}}+1),join(' ',@{$edges{$_}}))); +    } +    return join("\n",@lines); + } + +# Format that Dr. Sukhamay KUNDU likes to use in his assignments :) +# This format is just a directed graph - so transition and state info is lost + +sub as_digraph { +    my $self = shift; +    my @symbols = $self->alphabet(); +    my @states = $self->get_states();  +    my @lines = (); +    foreach (@states)  { +      my $s = $_; +      my @edges = (); +      foreach (@symbols) { +        my $a = $_; +	# foreach state, get all nodes connected to it; ignore symbols and +	# treat transitions simply as directed +	push(@edges,$self->successors($s,$a)); +      } +      @edges = sort {$a <=> $b;} $self->array_unique(@edges); #<- make items unique and sort numerically +      push(@lines,sprintf("%s(%s): %s",$s,($#edges+1),join(' ',@edges))); +    } +    return sprintf("%s\n%s",($#states+1),join("\n",@lines)); +} + + +# Graph Description Language, aiSee, etc +sub as_gdl { +    my $self = shift; +     +    my @states = map { +        sprintf qq{node: { title:"%s" shape:circle borderstyle: %s}\n}, +            $_, +            ($self->is_accepting($_) ? "double bordercolor: red" : "solid") +    } $self->get_states; +     +    my @trans; +    for my $s1 ($self->get_states) { +    for my $s2 ($self->get_states) { +        my $t = $self->get_transition($s1, $s2); +         +        if (defined $t) { +            push @trans, sprintf qq[edge: { source: "%s" target: "%s" label: "%s" arrowstyle: line }\n], +                $s1, $s2, $t->as_string; +        } +    }} +   +    return sprintf "graph: {\ndisplay_edge_labels: yes\n\n%s\n%s}\n", +        join("", @states), +        join("", @trans); +} + +# Graphviz: dot, etc +## digraph, directed +sub as_graphviz { +    my $self = shift; +     +    my @states = map { +        sprintf qq{%s [label="%s",shape=%s]\n}, +            $_, +            ($self->is_starting($_) ? "start ($_)" : "$_"), +            ($self->is_accepting($_) ? "doublecircle" : "circle") +    } $self->get_states; +     +    my @trans; +    for my $s1 ($self->get_states) { +    for my $s2 ($self->get_states) { +        my $t = $self->get_transition($s1, $s2); +         +        if (defined $t) { +            push @trans, sprintf qq[%s -> %s [label="%s"]\n], +                $s1, $s2, $t->as_string; +        } +    }} +     +    return sprintf "digraph G {\ngraph [rankdir=LR]\n\n%s\n%s}\n", +        join("", @states), +        join("", @trans); +} +## undirected +sub as_undirected_graphviz { +    my $self = shift; +     +    my @states = map { +        sprintf qq{%s [label="%s",shape=%s]\n}, +            $_, +            ("$_"), +            ("circle") +    } $self->get_states; +     +    my @trans; +    for my $s1 ($self->get_states) { +    for my $s2 ($self->get_states) { +        my $t = $self->get_transition($s1, $s2); +         +        if (defined $t) { +            push @trans, sprintf qq[%s -- %s\n], +                $s1, $s2, $t->as_string; +        } +    }} +     +    return sprintf "graph G {\ngraph [rankdir=LR]\n\n%s\n%s}\n", +        join("", @states), +        join("", @trans); +} + +sub _SET_ID { return join "\0", sort { $a <=> $b } @_; } + +sub as_summary { +    my $self = shift; +    my $out = '';  +    $out .= sprintf ("States         : "); +    my @start; +    my @final; +    foreach ($self->get_states()) { +      $out .= sprintf "'$_' "; +      if ($self->is_starting($_)) { +        push(@start,$_); +      } +      if ($self->is_accepting($_)) { +        push(@final,$_); +      } +    } +    $out .= sprintf ("\nStart State    : '%s'\n",join('',@start)); +    $out .= sprintf ("Final State(s) : "); +    foreach (@final) { +      $out .= sprintf "'$_' "; +    } +    $out .= sprintf ("\nAlphabet       : "); +    foreach ($self->alphabet()) { +      $out .= sprintf "'$_' "; +    } +    $out .= sprintf ("\nTransitions    :\n"); +    my @trans; +     for my $s1 ($self->get_states) { +     for my $s2 ($self->get_states) { +         my $t = $self->get_transition($s1, $s2); +         if (defined $t) { +             push @trans, sprintf qq[%s -> %s on "%s"\n], +                 $s1, $s2, $t->as_string; +         } +     }} +    $out .= join('',@trans); +    return $out;         +} + +1; + +__END__ + +=head1 USAGE + +In addition to implementing the interface specified in L<FLAT>, FLAT::NFA +objects provide the following NFA-specific methods: + +=over + +=item $nfa-E<gt>epsilon_closure(@states) + +Returns the set of states (without duplicates) which are reachable from +@states via zero or more epsilon-labeled transitions. + +=item $nfa-E<gt>trace($string) + +Returns a list of N+1 arrayrefs, where N is the length of $string. The +I-th arrayref contains the states which are reachable from the starting +state(s) of $nfa after reading I characters of $string. Correctly accounts +for epsilon transitions. + +=item $nfa-E<gt>as_undirected + +Outputs FA in a format that may be easily read into an external program as +a description of an undirected graph. + +=item $nfa-E<gt>as_digraph + +Outputs FA in a format that may be easily read into an external program as +a description of an directed graph. + +=item $nfa-E<gt>as_gdl + +Outputs FA in Graph Description Language (GDL), including directed transitions  +with symbols and state names labeled. + +=item $nfa-E<gt>as_graphviz + +Outputs FA in Graphviz format, including directed transitions with symbols and +and state names labeled.  This output may be directly piped into any of the +Graphviz layout programs, and in turn one may output an image using a single +commandline instruction. C<fash> uses this function to implement its "nfa2gv" +command: + + fash nfa2gv "a*b" | dot -Tpng > nfa.png + +=item $nfa-E<gt>as_undirected_graphviz + +Outputs FA in Graphviz format, with out the directed transitions or labels. +The output is suitable for any of the Graphvize layout programs, as discussed +above. + +=item $nfa-E<gt>as_summary + +Outputs a summary of the FA, including its states, symbols, and transition matrix. +It is useful for manually validating what the FA looks like. + +=back + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and  +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an  +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. diff --git a/lib/FLAT/PFA.pm b/lib/FLAT/PFA.pm new file mode 100644 index 0000000..5557525 --- /dev/null +++ b/lib/FLAT/PFA.pm @@ -0,0 +1,293 @@ +package FLAT::PFA; +use strict; +use base 'FLAT::NFA'; +use Carp; + +use FLAT::Transition; + +use constant LAMBDA => '#lambda'; + +# Note: in a PFA, states are made up of active nodes.  In this implementation, we have +# decided to retain the functionality of the state functions in FA.pm, although the entities +# being manipulated are technically nodes, not states.  States are only explicitly tracked +# once the PFA is serialized into an NFA.  Therefore, the TRANS member of the PFA object is +# the nodal transition function, gamma.  The state transition function, delta, is not used +# in anyway, but is derived out of the PFA->NFA conversion process. + + +# The new way of doing things eliminated from PFA.pm of FLAT::Legacy is the  +# need to explicitly track: start nodes, final nodes, symbols, and lambda & epsilon symbols,   + +sub new { +    my $pkg = shift; +    my $self = $pkg->SUPER::new(@_); # <-- SUPER is FLAT::NFA +    return $self; +} + +# Singleton is no different than the NFA singleton +sub singleton { +    my ($class, $char) = @_; +    my $pfa = $class->new; +    if (not defined $char) { +        $pfa->add_states(1); +        $pfa->set_starting(0); +    } elsif ($char eq "") { +        $pfa->add_states(1); +        $pfa->set_starting(0); +        $pfa->set_accepting(0); +    } else { +        $pfa->add_states(2); +        $pfa->set_starting(0); +        $pfa->set_accepting(1); +        $pfa->set_transition(0, 1, $char); +    } +    return $pfa; +} + +# attack of the clones +sub as_pfa { $_[0]->clone() } + +sub set_starting { +    my ($self, @states) = @_; +    $self->_assert_states(@states); +    $self->{STATES}[$_]{starting} = 1 for @states; +} + +# Creates a single start state with epsilon transitions from +# the former start states; +# Creates a single final state with epsilon transitions from +# the former accepting states +sub pinch { + my $self = shift; + my $symbol = shift; + my @starting = $self->get_starting; + if (@starting > 1) { +   my $newstart = $self->add_states(1); +   map {$self->add_transition($newstart,$_,$symbol)} @starting; +   $self->unset_starting(@starting); +   $self->set_starting($newstart); + } + # + my @accepting = $self->get_accepting; + if (@accepting > 1) { +   my $newfinal = $self->add_states(1); +   map {$self->add_transition($_,$newfinal,$symbol)} @accepting; +   $self->unset_accepting(@accepting); +   $self->set_accepting($newfinal); + } + return; +} + +# Implement the joining of two PFAs with lambda transitions +# Note: using epsilon pinches for simplicity +sub shuffle { +    my @pfas = map { $_->as_pfa } @_; +    my $result = $pfas[0]->clone; +    $result->_swallow($_) for @pfas[1 .. $#pfas]; +    $result->pinch(LAMBDA); +    $result; +} + +############## + +sub is_tied { +    my ($self, $state) = @_; +    $self->_assert_states($state); +    return $self->{STATES}[$state]{tied}; +} + +sub set_tied { +    my ($self, @states) = @_; +    $self->_assert_states(@states); +    $self->{STATES}[$_]{tied} = 1 for @states; +} + +sub unset_tied { +    my ($self, @states) = @_; +    $self->_assert_states(@states); +    $self->{STATES}[$_]{tied} = 0 for @states; +} + +sub get_tied { +    my $self = shift; +    return grep { $self->is_tied($_) } $self->get_states; +} + +############## + +# joins two PFAs in a union (or) - no change from NFA +sub union { +    my @pfas = map { $_->as_pfa } @_;     +    my $result = $pfas[0]->clone;     +    $result->_swallow($_) for @pfas[1 .. $#pfas]; +    $result->pinch(''); +    $result; +} + +# joins two PFAs via concatenation  - no change from NFA +sub concat { +    my @pfas = map { $_->as_pfa } @_; +     +    my $result = $pfas[0]->clone; +    my @newstate = ([ $result->get_states ]); +    my @start = $result->get_starting; + +    for (1 .. $#pfas) { +        push @newstate, [ $result->_swallow( $pfas[$_] ) ]; +    } + +    $result->unset_accepting($result->get_states); +    $result->unset_starting($result->get_states); +    $result->set_starting(@start); +     +    for my $pfa_id (1 .. $#pfas) { +        for my $s1 ($pfas[$pfa_id-1]->get_accepting) { +        for my $s2 ($pfas[$pfa_id]->get_starting) { +            $result->set_transition( +                $newstate[$pfa_id-1][$s1], +                $newstate[$pfa_id][$s2], "" ); +        }} +    } + +    $result->set_accepting( +        @{$newstate[-1]}[ $pfas[-1]->get_accepting ] ); + +    $result; +} + +# forms closure around a the given PFA  - no change from NFA +sub kleene { +    my $result = $_[0]->clone; +     +    my ($newstart, $newfinal) = $result->add_states(2); +     +    $result->set_transition($newstart, $_, "") +        for $result->get_starting; +    $result->unset_starting( $result->get_starting ); +    $result->set_starting($newstart); + +    $result->set_transition($_, $newfinal, "") +        for $result->get_accepting; +    $result->unset_accepting( $result->get_accepting ); +    $result->set_accepting($newfinal); + +    $result->set_transition($newstart, $newfinal, "");     +    $result->set_transition($newfinal, $newstart, ""); +     +    $result; +} + +sub as_nfa {  +    my $self = shift; +    my $result = FLAT::NFA->new();     +    # Dstates is initially populated with the start state, which  +    # is exactly the set of all nodes marked as a starting node +    my @Dstates = [sort($self->get_starting())]; # I suppose all start states are considered 'tied' +    my %DONE = ();                               # |- what about all accepting states? I think so... +    # the main while loop that ends when @Dstates becomes exhausted +    my %NEW = (); +    while (@Dstates) { +      my $current = pop(@Dstates); +      my $currentid = join(',',@{$current}); +      $DONE{$currentid}++;    # mark done +      foreach my $symbol ($self->alphabet(),'') {  # Sigma UNION epsilon +        if (LAMBDA eq $symbol) { +          my @NEXT = ();   +	  my @tmp = $self->successors([@{$current}],$symbol); +	  if (@tmp) { +	    my @pred = $self->predecessors([@tmp],LAMBDA); +            if ($self->array_is_subset([@pred],[@{$current}])) { +              push(@NEXT,@tmp,$self->array_complement([@{$current}],[@pred])); +              @NEXT = sort($self->array_unique(@NEXT)); +	      my $nextid = join(',',@NEXT); +	      push(@Dstates,[@NEXT]) if (!exists($DONE{$nextid})); 	             +              # make new states if none exist and track +              if (!exists($NEW{$currentid})) {$NEW{$currentid} = $result->add_states(1)}; +              if (!exists($NEW{$nextid}))    {$NEW{$nextid} = $result->add_states(1)   }; +	      $result->add_transition($NEW{$currentid},$NEW{$nextid},'');   +            } +	  } +        } else { +	  foreach my $node (@{$current}) { +	    my @tmp = $self->successors([$node],$symbol);	     +	    foreach my $new (@tmp) { +              my @NEXT = ();	       +              push(@NEXT,$new,$self->array_complement([@{$current}],[$node])); +              @NEXT = sort($self->array_unique(@NEXT)); +	      my $nextid = join(',',@NEXT); +	      push(@Dstates,[@NEXT]) if (!exists($DONE{$nextid})); 	             +              # make new states if none exist and track +              if (!exists($NEW{$currentid})) {$NEW{$currentid} = $result->add_states(1)}; +              if (!exists($NEW{$nextid}))    {$NEW{$nextid} = $result->add_states(1)   }; +	      $result->add_transition($NEW{$currentid},$NEW{$nextid},$symbol);	   +	    } +	  } +	} +      } +    } +    $result->set_starting($NEW{join(",",sort $self->get_starting())}); +    $result->set_accepting($NEW{join(",",sort $self->get_accepting())}); +    return $result; + } + +1; + +__END__ + +=head1 NAME + +FLAT::PFA - Parallel finite automata + +=head1 SYNOPSIS + +A FLAT::PFA object is a finite automata whose transitions are labeled either  +with characters, the empty string (epsilon), or a concurrent line of execution  +(lambda).  It essentially models two FSA in a non-deterministic way such that  +a string is valid it puts the FSA of the shuffled languages both into a final,  +or accepting, state.  A PFA is an NFA, and as such exactly describes a regular  +language. + +A PFA contains nodes and states.  A state is made up of whatever nodes happen  +to be active.  There are two transition functions, nodal transitions and state  +transitions.  When a PFA is converted into a NFA, there is no longer a need for  +nodes or nodal transitions, so they go are eliminated.  PFA model state spaces  +much more compactly than NFA, and an N state PFA may represent 2**N non-deterministic  +states. This also means that a PFA may represent up to 2^(2^N) deterministic states. + +=head1 USAGE + +(not implemented yet) + +In addition to implementing the interface specified in L<FLAT> and L<FLAT::NFA>,  +FLAT::PFA objects provide the following PFA-specific methods: + +=over + +=item $pfa-E<gt>shuffle + +Shuffle construct for building a PFA out of a PRE (i.e., a regular expression with +the shuffle operator) + +=item $pfa-E<gt>as_nfa + +Converts a PFA to an NFA by enumerating all states; similar to the Subset Construction +Algorithm, it does not implement e-closure.  Instead it treats epsilon transitions +normally, and joins any states resulting from a lambda (concurrent) transition +using an epsilon transition. + +=back + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and  +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an  +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. diff --git a/lib/FLAT/Regex.pm b/lib/FLAT/Regex.pm new file mode 100644 index 0000000..2c5c243 --- /dev/null +++ b/lib/FLAT/Regex.pm @@ -0,0 +1,194 @@ +package FLAT::Regex; +use base 'FLAT'; +use strict; +use Carp; + +use FLAT::Regex::Parser; +use FLAT::Regex::Op; + +my $PARSER = FLAT::Regex::Parser->new(qw[ alt concat star ]); +#### TODO: error checking in the parse + +sub _parser { $PARSER } + +sub new { +    my ($pkg, $string) = @_; +    my $result = $pkg->_parser->parse($string) +        or croak qq[``$string'' is not a valid regular expression]; + +    $pkg->_from_op( $result ); +} + +sub _from_op { +    my ($proto, $op) = @_; +    $proto = ref $proto || $proto; ## I really do want this +     +    bless [ $op ], $proto; +} + +sub op { +    $_[0][0]; +} + +use overload '""' => 'as_string'; +sub as_string { +    $_[0]->op->as_string(0); +} + +sub as_perl_regex { +    my ($self, %opts) = @_; +     +    my $fmt = $opts{anchored} ? '(?:\A%s\z)' : '(?:%s)'; +    return sprintf $fmt, $self->op->as_perl_regex(0); +} + +sub contains { +    my ($self, $string) = @_; +    $string =~ $self->as_perl_regex(anchored => 1); +} + +sub as_nfa { +    $_[0]->op->as_nfa; +} + +sub as_pfa { +    $_[0]->op->as_pfa; +} + +#### regular language standard interface implementation: +#### TODO: parameter checking? + +sub as_regex { +    $_[0]; +} + +sub union { +    my $self = $_[0]; +    my $op   = FLAT::Regex::op::alt->new( map { $_->as_regex->op } @_ ); +    $self->_from_op($op); +} + +sub intersect { +    my @dfas = map { $_->as_dfa } @_; +    my $self = shift @dfas; +    $self->intersect(@dfas)->as_regex; +} + +sub complement { +    my $self = shift; +    $self->as_dfa->complement->as_regex; +} + +sub concat { +    my $self = $_[0]; +    my $op = FLAT::Regex::op::concat->new( map { $_->as_regex->op } @_ ); +    $self->_from_op($op); +} + +sub kleene { +    my $self = shift; +    my $op   = FLAT::Regex::op::star->new( $self->op ); +    $self->_from_op($op); +} + +sub reverse { +    my $self = shift; +    my $op   = $self->op->reverse; +    $self->_from_op($op); +} + +sub is_empty { +    $_[0]->op->is_empty; +} + +sub is_finite { +    $_[0]->op->is_finite; +} + +1; + +__END__ + +=head1 NAME + +FLAT::Regex - Regular expressions + +=head1 SYNOPSIS + +A FLAT::Regex object is a regular expression. + +=head1 USAGE + +In addition to implementing the interface specified in L<FLAT>, FLAT::Regex +objects provide the following regex-specific methods: + +=over + +=item FLAT::Regex-E<gt>new($string) + +Returns a regex object representing the expression given in $string. C<|> +and C<+> can both be used to denote alternation. C<*> denotes Kleene star, and +parentheses can be used for grouping. No other features or shortcut notation +is currently supported (character classes, {n,m} repetition, etc). + +Whitespaces is ignored. To specify a literal space, use C<[ ]>. This syntax +can also be used to specify atomic "characters" longer than a single +character. For example, the expression: + +  [foo]abc[bar]* + +is treated as a regular expression over the symbols "a", "b", "c", "foo", +and "bar". In particular, this means that when the regular expression is +reversed, "foo" and "bar" remain the same (i.e, they do not become "oof" and +"rab"). + +The empty regular expression (epsilon) is written as C<[]>, and the null +regular expression (sometimes called phi) is specified with the C<#> +character. To specify a literal hash-character, use C<[#]>. Including +literal square bracket characters is currently not supported. + +The expression "" (or any string containing only whitespace) is not a valid +FLAT regex expression. Either C<[]> or C<#> are probably what was intended. + +=item $regex-E<gt>as_string + +Returns the string representation of the regex, in the same format as above. +It is NOT necessarily true that + +  FLAT::Regex->new($string)->as_string + +is identical to $string, especially if $string contains whitespace or +redundant parentheses. + +=item $regex-E<gt>as_perl_regex + +=item $regex-E<gt>as_perl_regex(anchored => $bool); + +Returns an equivalent Perl regular expression. If the "anchored" option +is set to a true value, the regular expression will be anchored with +C<\A> and C<\z>. The default behavior is to omit the anchors. + +The Perl regex will not contain capturing parentheses. "Extended" characters +that are written as "[char]" in FLAT regexes will be written without the +square brackets in the corresponding Perl regex. So the following: + +  FLAT::Regex->new("[foo][bar]*")->as_perl_regex + +will be equal to "(?:foo(?:bar)*)". + +=back + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and  +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an  +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. diff --git a/lib/FLAT/Regex/Op.pm b/lib/FLAT/Regex/Op.pm new file mode 100644 index 0000000..76e796c --- /dev/null +++ b/lib/FLAT/Regex/Op.pm @@ -0,0 +1,282 @@ +package FLAT::Regex::Op; +use strict; + +sub new { +    my $pkg = shift; +    ## flatten alike operations, i.e, "a+(b+c)" into "a+b+c" +    my @flat = map { UNIVERSAL::isa($_, $pkg) ? $_->members : $_ } @_; + +    bless \@flat, $pkg; +} + +sub members { +    my $self = shift; +    wantarray ? @$self[0 .. $#$self] : $self->[0]; +} + + +################################# +#### regex operators / components + +package FLAT::Regex::Op::atomic; +use base 'FLAT::Regex::Op'; + +sub as_string { +    my $t = $_[0]->members; +     +    return "#" if not defined $t; +    return $t =~ /^\w$/ +        ? $t +        : "[$t]"; +} + +sub as_perl_regex { +    my $r = $_[0]->members; + +    return "(?!)" if not defined $r; +     +    $r = quotemeta $r; +    return $r =~ /^\w$/ ? $r : "(?:$r)"; +} + +sub as_nfa { +    FLAT::NFA->singleton( $_[0]->members ); +} + +sub as_pfa { +    FLAT::PFA->singleton( $_[0]->members ); +} + +sub from_parse { +    my ($pkg, @item) = @_; +    my $i = $item[1]; +     +    return $pkg->new("")    if $i eq "[]"; +    return $pkg->new(undef) if $i eq "#"; +     +    $i =~ s/^\[|\]$//g; +     +    return $pkg->new($i); +} + +sub reverse { +    $_[0]; +} + +sub is_empty { +    not defined $_[0]->members; +} + +sub has_nonempty_string { +    my $self = shift; +    defined $self->members and length $self->members; +} + +sub is_finite { +    1 +} + +############################## +package FLAT::Regex::Op::star; +use base 'FLAT::Regex::Op'; + +sub parse_spec { "%s '*'" } +sub precedence { 30 } + +sub as_string { +    my ($self, $prec) = @_; +    my $result = $self->members->as_string($self->precedence) . "*"; +    return $prec > $self->precedence ? "($result)" : $result; +} + +sub as_perl_regex { +    my ($self, $prec) = @_; +    my $result = $self->members->as_perl_regex($self->precedence) . "*"; +    return $prec > $self->precedence ? "(?:$result)" : $result;    +} + +sub as_nfa { +    my $self = shift; +    $self->members->as_nfa->kleene; +} + +sub as_pfa { +    my $self = shift; +    $self->members->as_pfa->kleene; +} + +sub from_parse { +    my ($pkg, @item) = @_; +    $pkg->new( $item[1] ); +} + +sub reverse { +    my $self = shift; +    my $op   = $self->members->reverse; +    __PACKAGE__->new($op); +} + +sub is_empty { +    0 +} + +sub has_nonempty_string { +    $_[0]->members->has_nonempty_string; +} + +sub is_finite { +    ! $_[0]->members->has_nonempty_string; +} + + +################################ +package FLAT::Regex::Op::concat; +use base 'FLAT::Regex::Op'; + +sub parse_spec { "%s(2..)"; } +sub precedence { 20 } + +sub as_string { +    my ($self, $prec) = @_; +    my $result = join "", +                 map { $_->as_string($self->precedence) } +                 $self->members; +    return $prec > $self->precedence ? "($result)" : $result; +} + +sub as_perl_regex { +    my ($self, $prec) = @_; +    my $result = join "", +                 map { $_->as_perl_regex($self->precedence) } +                 $self->members; +    return $prec > $self->precedence ? "(?:$result)" : $result; +} + +sub as_nfa { +    my $self = shift; +    my @parts = map { $_->as_nfa } $self->members; +    $parts[0]->concat( @parts[1..$#parts] ); +} + +sub as_pfa { +    my $self = shift; +    my @parts = map { $_->as_pfa } $self->members; +    $parts[0]->concat( @parts[1..$#parts] ); +} + +sub from_parse { +    my ($pkg, @item) = @_; +    $pkg->new( @{ $item[1] } ); +} + +## note: "reverse" conflicts with perl builtin +sub reverse { +    my $self = shift; +    my @ops  = CORE::reverse map { $_->reverse } $self->members; +    __PACKAGE__->new(@ops); +} + +sub is_empty { +    my $self = shift; +    my @members = $self->members; +    for (@members) { +        return 1 if $_->is_empty; +    } +    return 0; +} + +sub has_nonempty_string { +    my $self = shift; +    return 0 if $self->is_empty; +     +    my @members = $self->members; +    for (@members) { +        return 1 if $_->has_nonempty_string; +    } +    return 0; +} + +sub is_finite { +    my $self = shift; +    return 1 if $self->is_empty; +     +    my @members = $self->members; +    for (@members) { +        return 0 if not $_->is_finite; +    } +    return 1; +} + +############################# +package FLAT::Regex::Op::alt; +use base 'FLAT::Regex::Op'; + +sub parse_spec { "%s(2.. /[+|]/)" } +sub precedence { 10 } + +sub as_string { +    my ($self, $prec) = @_; +    my $result = join "+", +                 map { $_->as_string($self->precedence) } +                 $self->members; +    return $prec > $self->precedence ? "($result)" : $result; +} + +sub as_perl_regex { +    my ($self, $prec) = @_; +    my $result = join "|", +                 map { $_->as_perl_regex($self->precedence) } +                 $self->members; +    return $prec > $self->precedence ? "(?:$result)" : $result; +} + +sub as_nfa { +    my $self = shift; +    my @parts = map { $_->as_nfa } $self->members; +    $parts[0]->union( @parts[1..$#parts] ); +} + +sub as_pfa { +    my $self = shift; +    my @parts = map { $_->as_pfa } $self->members; +    $parts[0]->union( @parts[1..$#parts] ); +} + +sub from_parse { +    my ($pkg, @item) = @_; +    $pkg->new( @{ $item[1] } ); +} + +sub reverse { +    my $self = shift; +    my @ops  = map { $_->reverse } $self->members; +    __PACKAGE__->new(@ops); +} + +sub is_empty { +    my $self = shift; +    my @members = $self->members; +    for (@members) { +        return 0 if not $_->is_empty; +    } +    return 1; +} + +sub has_nonempty_string { +    my $self = shift; +    my @members = $self->members; +    for (@members) { +        return 1 if $_->has_nonempty_string; +    } +    return 0; +} + +sub is_finite { +    my $self = shift; +    my @members = $self->members; +    for (@members) { +        return 0 if not $_->is_finite; +    } +    return 1; +} +1; diff --git a/lib/FLAT/Regex/Parser.pm b/lib/FLAT/Regex/Parser.pm new file mode 100644 index 0000000..deb73f1 --- /dev/null +++ b/lib/FLAT/Regex/Parser.pm @@ -0,0 +1,82 @@ +package FLAT::Regex::Parser; +use strict; + +#### Is this one level of abstraction too far? Parser generator generators.. + +#### TODO: try YAPP, since recursive descent is SLOOOW +use Parse::RecDescent; +use FLAT::Regex::Op; + +use vars '$CHAR'; +$CHAR = qr{ [A-Za-z0-9_\$\#] | \[[^\]]*\] }x; + +sub new { +    my $pkg = shift; +    my @ops = sort { $a->{prec} <=> $b->{prec} } +              map {{ +                  pkg   => "FLAT::Regex::Op::$_", +                  prec  => "FLAT::Regex::Op::$_"->precedence, +                  spec  => "FLAT::Regex::Op::$_"->parse_spec, +                  short => $_ +              }} @_; + +    my $lowest = shift @ops; +    my $grammar = qq! +            parse: +                $lowest->{short} /^\\Z/ { \$item[1] } +    !; +     +    my $prev = $lowest; +    for (@ops) { +        my $spec = sprintf $prev->{spec}, $_->{short}; +         +        $grammar .= qq! +            $prev->{short}: +                $spec       { $prev->{pkg}\->from_parse(\@item) } +              | $_->{short} { \$item[1] } +        !; +         +        $prev = $_; +    }             + +    my $spec = sprintf $prev->{spec}, "atomic"; +    $grammar .= qq! +            $prev->{short}: +                $spec  { $prev->{pkg}\->from_parse(\@item) } +              | atomic { \$item[1] } + +            atomic: +                "(" $lowest->{short} ")" { \$item[2] } +              | /\$FLAT::Regex::Parser::CHAR/ +                          { FLAT::Regex::Op::atomic->from_parse(\@item) } +    !; + +    Parse::RecDescent->new($grammar); +} + +1; + + +__END__ + +original parser: + +use vars '$CHAR'; +$CHAR = qr{ [A-Za-z0-9_\!\@\#\$\%\&] | \[[^\]]*\] }x; + +my $PARSER = Parse::RecDescent->new(<<'__EOG__') or die; +  parse: +      alt /^\Z/            { $item[1] } +  alt: +      concat(2.. /[+|]/)   { FLAT::Regex::Op::alt->from_parse(@item) } +    | concat               { $item[1] } +  concat: +      star(2..)            { FLAT::Regex::Op::concat->from_parse(@item) } +    | star                 { $item[1] } +  star : +      atomic '*'           { FLAT::Regex::Op::star->from_parse(@item) } +    | atomic               { $item[1] } +  atomic: +      "(" alt ")"          { $item[2] } +    | /$FLAT::Regex::CHAR/ { FLAT::Regex::Op::atomic->from_parse(@item) } +__EOG__ diff --git a/lib/FLAT/Regex/Transform.pm b/lib/FLAT/Regex/Transform.pm new file mode 100644 index 0000000..cd0cf56 --- /dev/null +++ b/lib/FLAT/Regex/Transform.pm @@ -0,0 +1,18 @@ +package FLAT::Regex::Transform; + +# Extends FLAT::Regex::WithExtraOps with PRegex transformations  +# (i.e., reductions based on: w*v & a*b + +use base 'FLAT::Regex::WithExtraOps'; + +sub new { +    my $pkg = shift; +    my $self = $pkg->SUPER::new(@_); +    return $self; +} + +# Ideally, the transformation should be implemented as an iterator.  This +# approach will be finite for shuffles with NO closed strings, but will carry on +# indefinitely for the shuffle of strings where at least one of the strings is closed + +1; diff --git a/lib/FLAT/Regex/Util.pm b/lib/FLAT/Regex/Util.pm new file mode 100644 index 0000000..516ad9f --- /dev/null +++ b/lib/FLAT/Regex/Util.pm @@ -0,0 +1,33 @@ +package FLAT::Regex::Util; +use base 'FLAT::Regex'; + +use strict; +use Carp; + +sub get_symbol { +  my @symbols = qw/0 1/; +  return $symbols[rand(2)]; +} + +sub get_op { +  my @ops = ('*','+','&','','','','','','',''); +  return $ops[rand(10)]; +} + +sub get_random { +  my $length = shift; +  my $string = '';  +  if (1 < $length) { +    $string = get_symbol().get_op().get_random(--$length); +  } else { +    $string = get_symbol(); +  } +  return $string; +} + +sub random_pre { +  my $length = ( $_[0] ? $_[0] : 32 ); +  return FLAT::Regex::WithExtraOps->new(get_random($length)); +} + +1; diff --git a/lib/FLAT/Regex/WithExtraOps.pm b/lib/FLAT/Regex/WithExtraOps.pm new file mode 100644 index 0000000..b366d7c --- /dev/null +++ b/lib/FLAT/Regex/WithExtraOps.pm @@ -0,0 +1,109 @@ +package FLAT::Regex::WithExtraOps; +use base 'FLAT::Regex'; + +use strict; +use Carp; + +my $PARSER = FLAT::Regex::Parser->new(qw[ alt concat star negate shuffle ]); +sub _parser { $PARSER } + +sub members { +    my $self = shift; +    wantarray ? @$self[0 .. $#$self] : $self->[0]; +} + +#### Precedence +# 30 ::star +# 20 ::concat +# 15 ::negate   <---<< WithExtraOps +# 12 ::shuffle  <---<< WithExtraOps +# 10 ::alt +# 0  ::atomic + +############################### +package FLAT::Regex::Op::negate; +use base "FLAT::Regex::Op"; +use Carp; + +sub parse_spec { "'~' %s"; } +sub precedence { 15 } # between concat and alternation + +sub as_string { +    my ($self, $prec) = @_; +    my $result = "~" . $self->members->as_string($self->precedence); +    return $prec > $self->precedence ? "($result)" : $result; +} + +sub from_parse { +    my ($pkg, @item) = @_; +    $pkg->new( $item[2] ); +} + +## note: "reverse" conflicts with perl builtin +sub reverse { +    my $self = shift; +    my $op   = $self->members->reverse; +    __PACKAGE__->new($op); +} + +sub is_empty { +    croak "Not implemented for negated regexes"; +} + +sub has_nonempty_string { +    croak "Not implemented for negated regexes"; +} + +sub is_finite { +    croak "Not implemented for negated regexes"; +} + +############################### +package FLAT::Regex::Op::shuffle; +use base 'FLAT::Regex::Op'; +use Carp; + +sub parse_spec { "%s(2.. /[&]/)" } +sub precedence { 12 } + +sub as_string { +    my ($self, $prec) = @_; +    my $result = join "&", +                 map { $_->as_string($self->precedence) } +                 $self->members; +    return $prec > $self->precedence ? "($result)" : $result; +} + +sub as_perl_regex { +    my $self = shift; +    croak "Not implemented for shuffled regexes"; +} + +sub from_parse { +    my ($pkg, @item) = @_; +    $pkg->new( @{ $item[1] } ); +} + +sub as_pfa { +    my $self = shift; +    my @parts = map { $_->as_pfa } $self->members; +    $parts[0]->shuffle( @parts[1..$#parts] ); +} + +# Implement? +sub reverse { +    my $self = shift; +    croak "Not implemented for shuffled regexes"; +} + +sub is_empty { +    croak "Not implemented for shuffled regexes"; +} + +sub has_nonempty_string { +    croak "Not implemented for shuffled regexes"; +} + +sub is_finite { +    croak "Not implemented for shuffled regexes"; +} diff --git a/lib/FLAT/Symbol.pm b/lib/FLAT/Symbol.pm new file mode 100644 index 0000000..aaadccc --- /dev/null +++ b/lib/FLAT/Symbol.pm @@ -0,0 +1,98 @@ +# +# Conceptual Experiment - not currently implemented anywhere... +# + +package FLAT::Symbol + +use strict; +use Carp; + +sub new { +  my ($pkg, $string, $type) = @_; +  bless { +    STRING => $string, +    TYPE => $type,  +  }, $pkg; +} + +sub as_string { +  return $_[0]->{STRING}; +} + +sub get_type } +  return $_[0]->{TYPE}; +} + +sub set_type { +  $_[0]->{TYPE} = $_[1]; +} + +1; + +################## + +package FLAT::Symbol::Regular; +use base 'FLAT::Symbol'; + +sub new { +  my $pkg = shift; +  my $self = $pkg->SUPER::new($_[0],'Regular'); +  return $self; +} + +sub get_type { +  return 'Regular'; +} + +sub set_type { +  croak("Sorry, can't change type for this symbol"); +} + +1;  + +################## + +package FLAT::Symbol::Special; +use base 'FLAT::Symbol'; + +sub new { +  my $pkg = shift; +  my $self = $pkg->SUPER::new($_[0],'Special'); +  return $self; +} + +sub get_type { +  return 'Special'; +} + +sub set_type { +  croak("Sorry, can't change type for this symbol");} + +1; + +__END__ + +=head1 NAME + +FLAT::Symbol - Base class for transition symbol. + +=head1 SYNOPSIS + +A super class that is intended to provide a simple mechanism for storing a symbol that might be  +in conflict with another symbol in string form.  TYPE is used to distinguish.  Currenly this neither  +this, nor its current sub classes, FLAT::Symbol::Regular and FLAT::Symbol::Special, are used. + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and  +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an  +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. diff --git a/lib/FLAT/Transition.pm b/lib/FLAT/Transition.pm new file mode 100644 index 0000000..fc385f3 --- /dev/null +++ b/lib/FLAT/Transition.pm @@ -0,0 +1,66 @@ +package FLAT::Transition; +use strict; +use Carp; + +sub new { +    my ($pkg, @things) = @_; +    bless { map { $_ => 1 } @things }, $pkg; +} + +sub does { +    my ($self, @things) = @_; +    return 1 if @things == 0; +    return !! grep $self->{$_}, @things; +} + +sub add { +    my ($self, @things) = @_; +    @$self{@things} = (1) x @things; +} + +sub delete { +    my ($self, @things) = @_; +    delete $self->{$_} for @things; +} + +sub alphabet { +    my $self = shift; +    sort { $a cmp $b } keys %$self; +} + +sub as_string { +    my $self = shift; +    join ",", map { length $_ ? $_ : "epsilon" } $self->alphabet; +} + +1; + +__END__ + +=head1 NAME + +FLAT::Transition - a transition base class.  + +=head1 SYNOPSIS + +Default implementation of the Transition class, used to manage transitions +from one state to others.  This class is meant for internal use. + +=head1 USAGE + +used internally; + +=head1 AUTHORS & ACKNOWLEDGEMENTS + +FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and  +Brett Estrade E<lt>estradb at gmail dot comE<gt>. + +The initial version (FLAT::Legacy) by Brett Estrade was work towards an  +MS thesis at the University of Southern Mississippi. + +Please visit the Wiki at http://www.0x743.com/flat + +=head1 LICENSE + +This module is free software; you can redistribute it and/or modify it under +the same terms as Perl itself. diff --git a/lib/FLAT/XFA.pm b/lib/FLAT/XFA.pm new file mode 100644 index 0000000..67e073c --- /dev/null +++ b/lib/FLAT/XFA.pm @@ -0,0 +1,3 @@ +package FLAT::XFA; + +1; | 
