summaryrefslogtreecommitdiff
path: root/lib/FLAT/NFA.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/FLAT/NFA.pm')
-rw-r--r--lib/FLAT/NFA.pm509
1 files changed, 509 insertions, 0 deletions
diff --git a/lib/FLAT/NFA.pm b/lib/FLAT/NFA.pm
new file mode 100644
index 0000000..344ea76
--- /dev/null
+++ b/lib/FLAT/NFA.pm
@@ -0,0 +1,509 @@
+package FLAT::NFA;
+
+use strict;
+use base 'FLAT::FA';
+
+use FLAT::Transition;
+
+=head1 NAME
+
+FLAT::NFA - Nondeterministic finite automata
+
+=head1 SYNOPSIS
+
+A FLAT::NFA object is a finite automata whose transitions are labeled
+either with characters or the empty string (epsilon).
+
+=cut
+
+sub new {
+ my $pkg = shift;
+ my $self = $pkg->SUPER::new(@_);
+ $self->{TRANS_CLASS} = "FLAT::Transition";
+ return $self;
+}
+
+sub singleton {
+ my ($class, $char) = @_;
+ my $nfa = $class->new;
+
+ if (not defined $char) {
+ $nfa->add_states(1);
+ $nfa->set_starting(0);
+ } elsif ($char eq "") {
+ $nfa->add_states(1);
+ $nfa->set_starting(0);
+ $nfa->set_accepting(0);
+ } else {
+ $nfa->add_states(2);
+ $nfa->set_starting(0);
+ $nfa->set_accepting(1);
+ $nfa->set_transition(0, 1, $char);
+ }
+ return $nfa;
+}
+
+sub as_nfa { $_[0]->clone }
+
+sub union {
+ my @nfas = map { $_->as_nfa } @_;
+ my $result = $nfas[0]->clone;
+ $result->_swallow($_) for @nfas[1 .. $#nfas];
+ $result;
+}
+
+sub concat {
+ my @nfas = map { $_->as_nfa } @_;
+
+ my $result = $nfas[0]->clone;
+ my @newstate = ([ $result->get_states ]);
+ my @start = $result->get_starting;
+
+ for (1 .. $#nfas) {
+ push @newstate, [ $result->_swallow( $nfas[$_] ) ];
+ }
+
+ $result->unset_accepting($result->get_states);
+ $result->unset_starting($result->get_states);
+ $result->set_starting(@start);
+
+ for my $nfa_id (1 .. $#nfas) {
+ for my $s1 ($nfas[$nfa_id-1]->get_accepting) {
+ for my $s2 ($nfas[$nfa_id]->get_starting) {
+ $result->set_transition(
+ $newstate[$nfa_id-1][$s1],
+ $newstate[$nfa_id][$s2], "" );
+ }}
+ }
+
+ $result->set_accepting(
+ @{$newstate[-1]}[ $nfas[-1]->get_accepting ] );
+
+ $result;
+}
+
+sub kleene {
+ my $result = $_[0]->clone;
+
+ my ($newstart, $newfinal) = $result->add_states(2);
+
+ $result->set_transition($newstart, $_, "")
+ for $result->get_starting;
+ $result->unset_starting( $result->get_starting );
+ $result->set_starting($newstart);
+
+ $result->set_transition($_, $newfinal, "")
+ for $result->get_accepting;
+ $result->unset_accepting( $result->get_accepting );
+ $result->set_accepting($newfinal);
+
+ $result->set_transition($newstart, $newfinal, "");
+ $result->set_transition($newfinal, $newstart, "");
+
+ $result;
+}
+
+sub reverse {
+ my $self = $_[0]->clone;
+ $self->_transpose;
+
+ my @start = $self->get_starting;
+ my @final = $self->get_accepting;
+
+ $self->unset_accepting( $self->get_states );
+ $self->unset_starting( $self->get_states );
+
+ $self->set_accepting( @start );
+ $self->set_starting( @final );
+
+ $self;
+}
+
+###########
+
+sub is_empty {
+ my $self = shift;
+
+ my @queue = $self->get_starting;
+ my %seen = map { $_ => 1 } @queue;
+
+ while (@queue) {
+ return 0 if grep { $self->is_accepting($_) } @queue;
+ @queue = grep { !$seen{$_}++ } $self->successors(\@queue);
+ }
+ return 1;
+}
+
+sub is_finite {
+ my $self = shift;
+
+ my @alphabet = $self->alphabet;
+ return 1 if @alphabet == 0;
+
+ my @queue = $self->get_starting;
+ my %seen = map { $_ => 1 } @queue;
+
+ while (@queue) {
+ @queue = grep { !$seen{$_}++ } $self->successors(\@queue);
+ }
+
+ for my $s ( grep { $self->is_accepting($_) } keys %seen ) {
+ @queue = $self->epsilon_closure($s);
+ %seen = map { $_ => 1 } @queue;
+
+ while (@queue) {
+ my @next = $self->epsilon_closure(
+ $self->successors(\@queue, \@alphabet) );
+
+ return 0 if grep { $s eq $_ } @next;
+ @queue = grep { !$seen{$_}++ } @next;
+ }
+ }
+ return 1;
+}
+
+sub epsilon_closure {
+ my ($self, @states) = @_;
+ my %seen = map { $_ => 1 } @states;
+ my @queue = @states;
+
+ while (@queue) {
+ @queue = grep { ! $seen{$_}++ } $self->successors( \@queue, "" );
+ }
+
+ keys %seen;
+}
+
+
+sub contains {
+ my ($self, $string) = @_;
+
+ my @active = $self->epsilon_closure( $self->get_starting );
+ for my $char (split //, $string) {
+ return 0 if ! @active;
+ @active = $self->epsilon_closure( $self->successors(\@active, $char) );
+ }
+ return !! grep { $self->is_accepting($_) } @active;
+}
+
+sub trace {
+ my ($self, $string) = @_;
+
+ my @trace = ([ $self->epsilon_closure( $self->get_starting ) ]);
+
+ for my $char (split //, $string) {
+ push @trace,
+ [ $self->epsilon_closure( $self->successors($trace[-1], $char) ) ];
+ }
+ return @trace;
+}
+############
+
+sub _extend_alphabet {
+ my ($self, @alpha) = @_;
+
+ my %alpha = map { $_ => 1 } @alpha;
+ delete $alpha{$_} for $self->alphabet;
+
+ return if not keys %alpha;
+
+ my $trash = $self->add_states(1);
+ for my $state ($self->get_states) {
+ next if $state eq $trash;
+ for my $char (keys %alpha) {
+ $self->add_transition($state, $trash, $char);
+ }
+ }
+ $self->add_transition($trash, $trash, $self->alphabet);
+}
+
+######## transformations
+
+# subset construction
+sub as_dfa {
+ my $self = shift;
+
+ my $result = FLAT::DFA->new;
+ my %subset;
+
+ my %final = map { $_ => 1 } $self->get_accepting;
+ my @start = sort { $a <=> $b } $self->epsilon_closure( $self->get_starting );
+
+ my $start = $subset{ _SET_ID(@start) } = $result->add_states(1);
+ $result->set_starting($start);
+
+ $result->set_accepting( $start )
+ if grep $_, @final{@start};
+
+ my @queue = (\@start);
+ while (@queue) {
+ my @states = @{ shift @queue };
+ my $S = $subset{ _SET_ID(@states) };
+
+ for my $symb ($self->alphabet) {
+ my @to = $self->epsilon_closure(
+ $self->successors(\@states, $symb) );
+
+ if ( not exists $subset{_SET_ID(@to)} ) {
+ push @queue, \@to;
+ my $T = $subset{_SET_ID(@to)} = $result->add_states(1);
+ $result->set_accepting($T)
+ if grep $_, @final{@to};
+ }
+
+ $result->add_transition($S, $subset{ _SET_ID(@to) }, $symb);
+ }
+ }
+
+ $result;
+}
+
+############ Formatted output
+
+# Format that Dr. Sukhamay KUNDU likes to use in his assignments :)
+# This format is just a undirected graph - so transition and state info is lost
+
+sub as_undirected {
+ my $self = shift;
+ my @symbols = $self->alphabet();
+ my @states = $self->get_states();
+ my %edges = ();
+ foreach (@states) {
+ my $s = $_;
+ foreach (@symbols) {
+ my $a = $_;
+ # foreach state, get all nodes connected to it; ignore symbols and
+ # treat transitions simply as directed
+ push(@{$edges{$s}},$self->successors($s,$a));
+ foreach ($self->successors($s,$a)) {
+ push(@{$edges{$_}},$s);
+ }
+ }
+ }
+ my @lines = (($#states+1));
+ foreach (sort{$a <=> $b;}(keys(%edges))) { #<-- iterate over numerically sorted list of keys
+ @{$edges{$_}} = sort {$a <=> $b;} $self->array_unique(@{$edges{$_}}); #<- make items unique and sort numerically
+ push(@lines,sprintf("%s(%s):%s",$_,($#{$edges{$_}}+1),join(' ',@{$edges{$_}})));
+ }
+ return join("\n",@lines);
+ }
+
+# Format that Dr. Sukhamay KUNDU likes to use in his assignments :)
+# This format is just a directed graph - so transition and state info is lost
+
+sub as_digraph {
+ my $self = shift;
+ my @symbols = $self->alphabet();
+ my @states = $self->get_states();
+ my @lines = ();
+ foreach (@states) {
+ my $s = $_;
+ my @edges = ();
+ foreach (@symbols) {
+ my $a = $_;
+ # foreach state, get all nodes connected to it; ignore symbols and
+ # treat transitions simply as directed
+ push(@edges,$self->successors($s,$a));
+ }
+ @edges = sort {$a <=> $b;} $self->array_unique(@edges); #<- make items unique and sort numerically
+ push(@lines,sprintf("%s(%s): %s",$s,($#edges+1),join(' ',@edges)));
+ }
+ return sprintf("%s\n%s",($#states+1),join("\n",@lines));
+}
+
+
+# Graph Description Language, aiSee, etc
+sub as_gdl {
+ my $self = shift;
+
+ my @states = map {
+ sprintf qq{node: { title:"%s" shape:circle borderstyle: %s}\n},
+ $_,
+ ($self->is_accepting($_) ? "double bordercolor: red" : "solid")
+ } $self->get_states;
+
+ my @trans;
+ for my $s1 ($self->get_states) {
+ for my $s2 ($self->get_states) {
+ my $t = $self->get_transition($s1, $s2);
+
+ if (defined $t) {
+ push @trans, sprintf qq[edge: { source: "%s" target: "%s" label: "%s" arrowstyle: line }\n],
+ $s1, $s2, $t->as_string;
+ }
+ }}
+
+ return sprintf "graph: {\ndisplay_edge_labels: yes\n\n%s\n%s}\n",
+ join("", @states),
+ join("", @trans);
+}
+
+# Graphviz: dot, etc
+## digraph, directed
+sub as_graphviz {
+ my $self = shift;
+
+ my @states = map {
+ sprintf qq{%s [label="%s",shape=%s]\n},
+ $_,
+ ($self->is_starting($_) ? "start ($_)" : "$_"),
+ ($self->is_accepting($_) ? "doublecircle" : "circle")
+ } $self->get_states;
+
+ my @trans;
+ for my $s1 ($self->get_states) {
+ for my $s2 ($self->get_states) {
+ my $t = $self->get_transition($s1, $s2);
+
+ if (defined $t) {
+ push @trans, sprintf qq[%s -> %s [label="%s"]\n],
+ $s1, $s2, $t->as_string;
+ }
+ }}
+
+ return sprintf "digraph G {\ngraph [rankdir=LR]\n\n%s\n%s}\n",
+ join("", @states),
+ join("", @trans);
+}
+## undirected
+sub as_undirected_graphviz {
+ my $self = shift;
+
+ my @states = map {
+ sprintf qq{%s [label="%s",shape=%s]\n},
+ $_,
+ ("$_"),
+ ("circle")
+ } $self->get_states;
+
+ my @trans;
+ for my $s1 ($self->get_states) {
+ for my $s2 ($self->get_states) {
+ my $t = $self->get_transition($s1, $s2);
+
+ if (defined $t) {
+ push @trans, sprintf qq[%s -- %s\n],
+ $s1, $s2, $t->as_string;
+ }
+ }}
+
+ return sprintf "graph G {\ngraph [rankdir=LR]\n\n%s\n%s}\n",
+ join("", @states),
+ join("", @trans);
+}
+
+sub _SET_ID { return join "\0", sort { $a <=> $b } @_; }
+
+sub as_summary {
+ my $self = shift;
+ my $out = '';
+ $out .= sprintf ("States : ");
+ my @start;
+ my @final;
+ foreach ($self->get_states()) {
+ $out .= sprintf "'$_' ";
+ if ($self->is_starting($_)) {
+ push(@start,$_);
+ }
+ if ($self->is_accepting($_)) {
+ push(@final,$_);
+ }
+ }
+ $out .= sprintf ("\nStart State : '%s'\n",join('',@start));
+ $out .= sprintf ("Final State(s) : ");
+ foreach (@final) {
+ $out .= sprintf "'$_' ";
+ }
+ $out .= sprintf ("\nAlphabet : ");
+ foreach ($self->alphabet()) {
+ $out .= sprintf "'$_' ";
+ }
+ $out .= sprintf ("\nTransitions :\n");
+ my @trans;
+ for my $s1 ($self->get_states) {
+ for my $s2 ($self->get_states) {
+ my $t = $self->get_transition($s1, $s2);
+ if (defined $t) {
+ push @trans, sprintf qq[%s -> %s on "%s"\n],
+ $s1, $s2, $t->as_string;
+ }
+ }}
+ $out .= join('',@trans);
+ return $out;
+}
+
+1;
+
+__END__
+
+=head1 USAGE
+
+In addition to implementing the interface specified in L<FLAT>, FLAT::NFA
+objects provide the following NFA-specific methods:
+
+=over
+
+=item $nfa-E<gt>epsilon_closure(@states)
+
+Returns the set of states (without duplicates) which are reachable from
+@states via zero or more epsilon-labeled transitions.
+
+=item $nfa-E<gt>trace($string)
+
+Returns a list of N+1 arrayrefs, where N is the length of $string. The
+I-th arrayref contains the states which are reachable from the starting
+state(s) of $nfa after reading I characters of $string. Correctly accounts
+for epsilon transitions.
+
+=item $nfa-E<gt>as_undirected
+
+Outputs FA in a format that may be easily read into an external program as
+a description of an undirected graph.
+
+=item $nfa-E<gt>as_digraph
+
+Outputs FA in a format that may be easily read into an external program as
+a description of an directed graph.
+
+=item $nfa-E<gt>as_gdl
+
+Outputs FA in Graph Description Language (GDL), including directed transitions
+with symbols and state names labeled.
+
+=item $nfa-E<gt>as_graphviz
+
+Outputs FA in Graphviz format, including directed transitions with symbols and
+and state names labeled. This output may be directly piped into any of the
+Graphviz layout programs, and in turn one may output an image using a single
+commandline instruction. C<fash> uses this function to implement its "nfa2gv"
+command:
+
+ fash nfa2gv "a*b" | dot -Tpng > nfa.png
+
+=item $nfa-E<gt>as_undirected_graphviz
+
+Outputs FA in Graphviz format, with out the directed transitions or labels.
+The output is suitable for any of the Graphvize layout programs, as discussed
+above.
+
+=item $nfa-E<gt>as_summary
+
+Outputs a summary of the FA, including its states, symbols, and transition matrix.
+It is useful for manually validating what the FA looks like.
+
+=back
+
+=head1 AUTHORS & ACKNOWLEDGEMENTS
+
+FLAT is written by Mike Rosulek E<lt>mike at mikero dot comE<gt> and
+Brett Estrade E<lt>estradb at gmail dot comE<gt>.
+
+The initial version (FLAT::Legacy) by Brett Estrade was work towards an
+MS thesis at the University of Southern Mississippi.
+
+Please visit the Wiki at http://www.0x743.com/flat
+
+=head1 LICENSE
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.