import SLY

author: Daniel Friesel <daniel.friesel@uos.de> 2019-12-12 17:33:13 +0100
committer: Daniel Friesel <daniel.friesel@uos.de> 2019-12-12 17:33:13 +0100
commit: ff6fc70456354b23663bee037c5e737a2b437ca8 (patch)
tree: 57397e592e664bf5f20b72032be887557bc2df08
parent: 575a33b819d29ee99fa7d4264a943043d1d64032 (diff)
5 files changed, 2531 insertions, 0 deletions
diff --git a/lib/sly/__init__.py b/lib/sly/__init__.py
new file mode 100644
index 0000000..3c1e708
--- /dev/null
+++ b/lib/sly/__init__.py
@@ -0,0 +1,6 @@
+
+from .lex import *
+from .yacc import *
+
+__version__ = "0.4"
+__all__ = [ *lex.__all__, *yacc.__all__ ]
diff --git a/lib/sly/ast.py b/lib/sly/ast.py
new file mode 100644
index 0000000..7b79ac5
--- /dev/null
+++ b/lib/sly/ast.py
@@ -0,0 +1,25 @@
+# sly/ast.py
+import sys
+
+class AST(object):
+    
+    @classmethod
+    def __init_subclass__(cls, **kwargs):
+        mod = sys.modules[cls.__module__]
+        if not hasattr(cls, '__annotations__'):
+            return
+
+        hints = list(cls.__annotations__.items())
+
+        def __init__(self, *args, **kwargs):
+            if len(hints) != len(args):
+                raise TypeError(f'Expected {len(hints)} arguments')
+            for arg, (name, val) in zip(args, hints):
+                if isinstance(val, str):
+                    val = getattr(mod, val)
+                if not isinstance(arg, val):
+                    raise TypeError(f'{name} argument must be {val}')
+                setattr(self, name, arg)
+
+        cls.__init__ = __init__
+
diff --git a/lib/sly/docparse.py b/lib/sly/docparse.py
new file mode 100644
index 0000000..d5a83ce
--- /dev/null
+++ b/lib/sly/docparse.py
@@ -0,0 +1,60 @@
+# docparse.py
+#
+# Support doc-string parsing classes
+
+__all__ = [ 'DocParseMeta' ]
+
+class DocParseMeta(type):
+    '''
+    Metaclass that processes the class docstring through a parser and
+    incorporates the result into the resulting class definition. This
+    allows Python classes to be defined with alternative syntax. 
+    To use this class, you first need to define a lexer and parser:
+
+        from sly import Lexer, Parser
+        class MyLexer(Lexer):
+           ...
+
+        class MyParser(Parser):
+           ...
+
+    You then need to define a metaclass that inherits from DocParseMeta.
+    This class must specify the associated lexer and parser classes.
+    For example:
+
+        class MyDocParseMeta(DocParseMeta):
+            lexer = MyLexer
+            parser = MyParser
+
+    This metaclass is then used as a base for processing user-defined
+    classes:
+
+        class Base(metaclass=MyDocParseMeta):
+            pass
+
+        class Spam(Base):
+            """
+            doc string is parsed
+            ...
+            """
+
+    It is expected that the MyParser() class would return a dictionary. 
+    This dictionary is used to create the final class Spam in this example.
+    '''
+
+    @staticmethod
+    def __new__(meta, clsname, bases, clsdict):
+        if '__doc__' in clsdict:
+            lexer = meta.lexer()
+            parser = meta.parser()
+            lexer.cls_name = parser.cls_name = clsname
+            lexer.cls_qualname = parser.cls_qualname = clsdict['__qualname__']
+            lexer.cls_module = parser.cls_module = clsdict['__module__']
+            parsedict = parser.parse(lexer.tokenize(clsdict['__doc__']))
+            assert isinstance(parsedict, dict), 'Parser must return a dictionary'
+            clsdict.update(parsedict)
+        return super().__new__(meta, clsname, bases, clsdict)
+
+    @classmethod
+    def __init_subclass__(cls):
+        assert hasattr(cls, 'parser') and hasattr(cls, 'lexer')
diff --git a/lib/sly/lex.py b/lib/sly/lex.py
new file mode 100644
index 0000000..246dd9e
--- /dev/null
+++ b/lib/sly/lex.py
@@ -0,0 +1,439 @@
+# -----------------------------------------------------------------------------
+# sly: lex.py
+#
+# Copyright (C) 2016 - 2018
+# David M. Beazley (Dabeaz LLC)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# * Neither the name of the David Beazley or Dabeaz LLC may be used to
+#   endorse or promote products derived from this software without
+#  specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# -----------------------------------------------------------------------------
+
+__all__ = ['Lexer', 'LexerStateChange']
+
+import re
+import copy
+
+class LexError(Exception):
+    '''
+    Exception raised if an invalid character is encountered and no default
+    error handler function is defined.  The .text attribute of the exception
+    contains all remaining untokenized text. The .error_index is the index
+    location of the error.
+    '''
+    def __init__(self, message, text, error_index):
+        self.args = (message,)
+        self.text = text
+        self.error_index = error_index
+
+class PatternError(Exception):
+    '''
+    Exception raised if there's some kind of problem with the specified
+    regex patterns in the lexer.
+    '''
+    pass
+
+class LexerBuildError(Exception):
+    '''
+    Exception raised if there's some sort of problem building the lexer.
+    '''
+    pass
+
+class LexerStateChange(Exception):
+    '''
+    Exception raised to force a lexing state change
+    '''
+    def __init__(self, newstate, tok=None):
+        self.newstate = newstate
+        self.tok = tok
+
+class Token(object):
+    '''
+    Representation of a single token.
+    '''
+    __slots__ = ('type', 'value', 'lineno', 'index')
+    def __repr__(self):
+        return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index})'
+
+class TokenStr(str):
+    @staticmethod
+    def __new__(cls, value, key=None, remap=None):
+        self = super().__new__(cls, value)
+        self.key = key
+        self.remap = remap
+        return self
+
+    # Implementation of TOKEN[value] = NEWTOKEN
+    def __setitem__(self, key, value):
+        if self.remap is not None:
+            self.remap[self.key, key] = value
+
+    # Implementation of del TOKEN[value]
+    def __delitem__(self, key):
+        if self.remap is not None:
+            self.remap[self.key, key] = self.key
+
+class _Before:
+    def __init__(self, tok, pattern):
+        self.tok = tok
+        self.pattern = pattern
+
+class LexerMetaDict(dict):
+    '''
+    Special dictionary that prohibits duplicate definitions in lexer specifications.
+    '''
+    def __init__(self):
+        self.before = { }
+        self.delete = [ ]
+        self.remap = { }
+
+    def __setitem__(self, key, value):
+        if isinstance(value, str):
+            value = TokenStr(value, key, self.remap)
+            
+        if isinstance(value, _Before):
+            self.before[key] = value.tok
+            value = TokenStr(value.pattern, key, self.remap)
+            
+        if key in self and not isinstance(value, property):
+            prior = self[key]
+            if isinstance(prior, str):
+                if callable(value):
+                    value.pattern = prior
+                else:
+                    raise AttributeError(f'Name {key} redefined')
+
+        super().__setitem__(key, value)
+
+    def __delitem__(self, key):
+        self.delete.append(key)
+        if key not in self and key.isupper():
+            pass
+        else:
+            return super().__delitem__(key)
+
+    def __getitem__(self, key):
+        if key not in self and key.split('ignore_')[-1].isupper() and key[:1] != '_':
+            return TokenStr(key, key, self.remap)
+        else:
+            return super().__getitem__(key)
+
+class LexerMeta(type):
+    '''
+    Metaclass for collecting lexing rules
+    '''
+    @classmethod
+    def __prepare__(meta, name, bases):
+        d = LexerMetaDict()
+
+        def _(pattern, *extra):
+            patterns = [pattern, *extra]
+            def decorate(func):
+                pattern = '|'.join(f'({pat})' for pat in patterns )
+                if hasattr(func, 'pattern'):
+                    func.pattern = pattern + '|' + func.pattern
+                else:
+                    func.pattern = pattern
+                return func
+            return decorate
+
+        d['_'] = _
+        d['before'] = _Before
+        return d
+
+    def __new__(meta, clsname, bases, attributes):
+        del attributes['_']
+        del attributes['before']
+
+        # Create attributes for use in the actual class body
+        cls_attributes = { str(key): str(val) if isinstance(val, TokenStr) else val
+                           for key, val in attributes.items() }
+        cls = super().__new__(meta, clsname, bases, cls_attributes)
+
+        # Attach various metadata to the class
+        cls._attributes = dict(attributes)
+        cls._remap = attributes.remap
+        cls._before = attributes.before
+        cls._delete = attributes.delete
+        cls._build()
+        return cls
+
+class Lexer(metaclass=LexerMeta):
+    # These attributes may be defined in subclasses
+    tokens = set()
+    literals = set()
+    ignore = ''
+    reflags = 0
+    regex_module = re
+
+    _token_names = set()
+    _token_funcs = {}
+    _ignored_tokens = set()
+    _remapping = {}
+    _delete = {}
+    _remap = {}
+
+    # Internal attributes
+    __state_stack = None
+    __set_state = None
+
+    @classmethod
+    def _collect_rules(cls):
+        # Collect all of the rules from class definitions that look like token
+        # information.   There are a few things that govern this:
+        #
+        # 1.  Any definition of the form NAME = str is a token if NAME is
+        #     is defined in the tokens set.
+        #
+        # 2.  Any definition of the form ignore_NAME = str is a rule for an ignored
+        #     token.
+        #
+        # 3.  Any function defined with a 'pattern' attribute is treated as a rule.
+        #     Such functions can be created with the @_ decorator or by defining
+        #     function with the same name as a previously defined string.
+        #
+        # This function is responsible for keeping rules in order. 
+
+        # Collect all previous rules from base classes
+        rules = []
+
+        for base in cls.__bases__:
+            if isinstance(base, LexerMeta):
+                rules.extend(base._rules)
+                
+        # Dictionary of previous rules
+        existing = dict(rules)
+
+        for key, value in cls._attributes.items():
+            if (key in cls._token_names) or key.startswith('ignore_') or hasattr(value, 'pattern'):
+                if callable(value) and not hasattr(value, 'pattern'):
+                    raise LexerBuildError(f"function {value} doesn't have a regex pattern")
+                
+                if key in existing:
+                    # The definition matches something that already existed in the base class.
+                    # We replace it, but keep the original ordering
+                    n = rules.index((key, existing[key]))
+                    rules[n] = (key, value)
+                    existing[key] = value
+
+                elif isinstance(value, TokenStr) and key in cls._before:
+                    before = cls._before[key]
+                    if before in existing:
+                        # Position the token before another specified token
+                        n = rules.index((before, existing[before]))
+                        rules.insert(n, (key, value))
+                    else:
+                        # Put at the end of the rule list
+                        rules.append((key, value))
+                    existing[key] = value
+                else:
+                    rules.append((key, value))
+                    existing[key] = value
+
+            elif isinstance(value, str) and not key.startswith('_') and key not in {'ignore', 'literals'}:
+                raise LexerBuildError(f'{key} does not match a name in tokens')
+
+        # Apply deletion rules
+        rules = [ (key, value) for key, value in rules if key not in cls._delete ]
+        cls._rules = rules
+
+    @classmethod
+    def _build(cls):
+        '''
+        Build the lexer object from the collected tokens and regular expressions.
+        Validate the rules to make sure they look sane.
+        '''
+        if 'tokens' not in vars(cls):
+            raise LexerBuildError(f'{cls.__qualname__} class does not define a tokens attribute')
+
+        # Pull definitions created for any parent classes
+        cls._token_names = cls._token_names | set(cls.tokens)
+        cls._ignored_tokens = set(cls._ignored_tokens)
+        cls._token_funcs = dict(cls._token_funcs)
+        cls._remapping = dict(cls._remapping)
+
+        for (key, val), newtok in cls._remap.items():
+            if key not in cls._remapping:
+                cls._remapping[key] = {}
+            cls._remapping[key][val] = newtok
+
+        remapped_toks = set()
+        for d in cls._remapping.values():
+            remapped_toks.update(d.values())
+            
+        undefined = remapped_toks - set(cls._token_names)
+        if undefined:
+            missing = ', '.join(undefined)
+            raise LexerBuildError(f'{missing} not included in token(s)')
+
+        cls._collect_rules()
+
+        parts = []
+        for tokname, value in cls._rules:
+            if tokname.startswith('ignore_'):
+                tokname = tokname[7:]
+                cls._ignored_tokens.add(tokname)
+
+            if isinstance(value, str):
+                pattern = value
+
+            elif callable(value):
+                cls._token_funcs[tokname] = value
+                pattern = getattr(value, 'pattern')
+
+            # Form the regular expression component
+            part = f'(?P<{tokname}>{pattern})'
+
+            # Make sure the individual regex compiles properly
+            try:
+                cpat = cls.regex_module.compile(part, cls.reflags)
+            except Exception as e:
+                raise PatternError(f'Invalid regex for token {tokname}') from e
+
+            # Verify that the pattern doesn't match the empty string
+            if cpat.match(''):
+                raise PatternError(f'Regex for token {tokname} matches empty input')
+
+            parts.append(part)
+
+        if not parts:
+            return
+
+        # Form the master regular expression
+        #previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
+        # cls._master_re = cls.regex_module.compile('|'.join(parts) + previous, cls.reflags)
+        cls._master_re = cls.regex_module.compile('|'.join(parts), cls.reflags)
+
+        # Verify that that ignore and literals specifiers match the input type
+        if not isinstance(cls.ignore, str):
+            raise LexerBuildError('ignore specifier must be a string')
+
+        if not all(isinstance(lit, str) for lit in cls.literals):
+            raise LexerBuildError('literals must be specified as strings')
+
+    def begin(self, cls):
+        '''
+        Begin a new lexer state
+        '''
+        assert isinstance(cls, LexerMeta), "state must be a subclass of Lexer"
+        if self.__set_state:
+            self.__set_state(cls)
+        self.__class__ = cls
+
+    def push_state(self, cls):
+        '''
+        Push a new lexer state onto the stack
+        '''
+        if self.__state_stack is None:
+            self.__state_stack = []
+        self.__state_stack.append(type(self))
+        self.begin(cls)
+
+    def pop_state(self):
+        '''
+        Pop a lexer state from the stack
+        '''
+        self.begin(self.__state_stack.pop())
+
+    def tokenize(self, text, lineno=1, index=0):
+        _ignored_tokens = _master_re = _ignore = _token_funcs = _literals = _remapping = None
+
+        def _set_state(cls):
+            nonlocal _ignored_tokens, _master_re, _ignore, _token_funcs, _literals, _remapping
+            _ignored_tokens = cls._ignored_tokens
+            _master_re = cls._master_re
+            _ignore = cls.ignore
+            _token_funcs = cls._token_funcs
+            _literals = cls.literals
+            _remapping = cls._remapping
+
+        self.__set_state = _set_state
+        _set_state(type(self))
+        self.text = text
+
+        try:
+            while True:
+                try:
+                    if text[index] in _ignore:
+                        index += 1
+                        continue
+                except IndexError:
+                    return
+
+                tok = Token()
+                tok.lineno = lineno
+                tok.index = index
+                m = _master_re.match(text, index)
+                if m:
+                    index = m.end()
+                    tok.value = m.group()
+                    tok.type = m.lastgroup
+
+                    if tok.type in _remapping:
+                        tok.type = _remapping[tok.type].get(tok.value, tok.type)
+
+                    if tok.type in _token_funcs:
+                        self.index = index
+                        self.lineno = lineno
+                        tok = _token_funcs[tok.type](self, tok)
+                        index = self.index
+                        lineno = self.lineno
+                        if not tok:
+                            continue
+
+                    if tok.type in _ignored_tokens:
+                        continue
+
+                    yield tok
+
+                else:
+                    # No match, see if the character is in literals
+                    if text[index] in _literals:
+                        tok.value = text[index]
+                        tok.type = tok.value
+                        index += 1
+                        yield tok
+                    else:
+                        # A lexing error
+                        self.index = index
+                        self.lineno = lineno
+                        tok.type = 'ERROR'
+                        tok.value = text[index:]
+                        tok = self.error(tok)
+                        if tok is not None:
+                            yield tok
+
+                        index = self.index
+                        lineno = self.lineno
+
+        # Set the final state of the lexer before exiting (even if exception)
+        finally:
+            self.text = text
+            self.index = index
+            self.lineno = lineno
+
+    # Default implementations of the error handler. May be changed in subclasses
+    def error(self, t):
+        raise LexError(f'Illegal character {t.value[0]!r} at index {self.index}', t.value, self.index)
diff --git a/lib/sly/yacc.py b/lib/sly/yacc.py
new file mode 100644
index 0000000..c30f13c
--- /dev/null
+++ b/lib/sly/yacc.py
@@ -0,0 +1,2001 @@
+# -----------------------------------------------------------------------------
+# sly: yacc.py
+#
+# Copyright (C) 2016-2018
+# David M. Beazley (Dabeaz LLC)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# * Neither the name of the David Beazley or Dabeaz LLC may be used to
+#   endorse or promote products derived from this software without
+#  specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# -----------------------------------------------------------------------------
+
+import sys
+import inspect
+from collections import OrderedDict, defaultdict
+
+__all__        = [ 'Parser' ]
+
+class YaccError(Exception):
+    '''
+    Exception raised for yacc-related build errors.
+    '''
+    pass
+
+#-----------------------------------------------------------------------------
+#                     === User configurable parameters ===
+#
+# Change these to modify the default behavior of yacc (if you wish).  
+# Move these parameters to the Yacc class itself.
+#-----------------------------------------------------------------------------
+
+ERROR_COUNT = 3                # Number of symbols that must be shifted to leave recovery mode
+MAXINT = sys.maxsize
+
+# This object is a stand-in for a logging object created by the
+# logging module.   SLY will use this by default to create things
+# such as the parser.out file.  If a user wants more detailed
+# information, they can create their own logging object and pass
+# it into SLY.
+
+class SlyLogger(object):
+    def __init__(self, f):
+        self.f = f
+
+    def debug(self, msg, *args, **kwargs):
+        self.f.write((msg % args) + '\n')
+
+    info = debug
+
+    def warning(self, msg, *args, **kwargs):
+        self.f.write('WARNING: ' + (msg % args) + '\n')
+
+    def error(self, msg, *args, **kwargs):
+        self.f.write('ERROR: ' + (msg % args) + '\n')
+
+    critical = debug
+
+
+# ----------------------------------------------------------------------
+# This class is used to hold non-terminal grammar symbols during parsing.
+# It normally has the following attributes set:
+#        .type       = Grammar symbol type
+#        .value      = Symbol value
+#        .lineno     = Starting line number
+#        .index      = Starting lex position
+# ----------------------------------------------------------------------
+
+class YaccSymbol:
+    def __str__(self):
+        return self.type
+
+    def __repr__(self):
+        return str(self)
+
+# ----------------------------------------------------------------------
+# This class is a wrapper around the objects actually passed to each
+# grammar rule.   Index lookup and assignment actually assign the
+# .value attribute of the underlying YaccSymbol object.
+# The lineno() method returns the line number of a given
+# item (or 0 if not defined).   
+# ----------------------------------------------------------------------
+
+class YaccProduction:
+    __slots__ = ('_slice', '_namemap', '_stack')
+    def __init__(self, s, stack=None):
+        self._slice = s
+        self._namemap = { }
+        self._stack = stack
+
+    def __getitem__(self, n):
+        if n >= 0:
+            return self._slice[n].value
+        else:
+            return self._stack[n].value
+
+    def __setitem__(self, n, v):
+        if n >= 0:
+            self._slice[n].value = v
+        else:
+            self._stack[n].value = v
+
+    def __len__(self):
+        return len(self._slice)
+
+    @property
+    def lineno(self):
+        for tok in self._slice:
+            if isinstance(tok, YaccSymbol):
+                continue
+            lineno = getattr(tok, 'lineno', None)
+            if lineno:
+                return lineno
+        raise AttributeError('No line number found')
+
+    @property
+    def index(self):
+        for tok in self._slice:
+            if isinstance(tok, YaccSymbol):
+                continue
+            index = getattr(tok, 'index', None)
+            if index is not None:
+                return index
+        raise AttributeError('No index attribute found')
+
+    def __getattr__(self, name):
+        if name in self._namemap:
+            return self._slice[self._namemap[name]].value
+        else:
+            nameset = '{' + ', '.join(self._namemap) + '}'
+            raise AttributeError(f'No symbol {name}. Must be one of {nameset}.')
+
+    def __setattr__(self, name, value):
+        if name[:1] == '_':
+            super().__setattr__(name, value)
+        else:
+            raise AttributeError(f"Can't reassign the value of attribute {name!r}")
+
+# -----------------------------------------------------------------------------
+#                          === Grammar Representation ===
+#
+# The following functions, classes, and variables are used to represent and
+# manipulate the rules that make up a grammar.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# class Production:
+#
+# This class stores the raw information about a single production or grammar rule.
+# A grammar rule refers to a specification such as this:
+#
+#       expr : expr PLUS term
+#
+# Here are the basic attributes defined on all productions
+#
+#       name     - Name of the production.  For example 'expr'
+#       prod     - A list of symbols on the right side ['expr','PLUS','term']
+#       prec     - Production precedence level
+#       number   - Production number.
+#       func     - Function that executes on reduce
+#       file     - File where production function is defined
+#       lineno   - Line number where production function is defined
+#
+# The following attributes are defined or optional.
+#
+#       len       - Length of the production (number of symbols on right hand side)
+#       usyms     - Set of unique symbols found in the production
+# -----------------------------------------------------------------------------
+
+class Production(object):
+    reduced = 0
+    def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
+        self.name     = name
+        self.prod     = tuple(prod)
+        self.number   = number
+        self.func     = func
+        self.file     = file
+        self.line     = line
+        self.prec     = precedence
+        
+        # Internal settings used during table construction
+        self.len  = len(self.prod)   # Length of the production
+
+        # Create a list of unique production symbols used in the production
+        self.usyms = []
+        symmap = defaultdict(list)
+        for n, s in enumerate(self.prod):
+            symmap[s].append(n)
+            if s not in self.usyms:
+                self.usyms.append(s)
+
+        # Create a dict mapping symbol names to indices
+        m = {}
+        for key, indices in symmap.items():
+            if len(indices) == 1:
+                m[key] = indices[0]
+            else:
+                for n, index in enumerate(indices):
+                    m[key+str(n)] = index
+
+        self.namemap = m
+                
+        # List of all LR items for the production
+        self.lr_items = []
+        self.lr_next = None
+
+    def __str__(self):
+        if self.prod:
+            s = '%s -> %s' % (self.name, ' '.join(self.prod))
+        else:
+            s = f'{self.name} -> <empty>'
+
+        if self.prec[1]:
+            s += '  [precedence=%s, level=%d]' % self.prec
+
+        return s
+
+    def __repr__(self):
+        return f'Production({self})'
+
+    def __len__(self):
+        return len(self.prod)
+
+    def __nonzero__(self):
+        raise RuntimeError('Used')
+        return 1
+
+    def __getitem__(self, index):
+        return self.prod[index]
+
+    # Return the nth lr_item from the production (or None if at the end)
+    def lr_item(self, n):
+        if n > len(self.prod):
+            return None
+        p = LRItem(self, n)
+        # Precompute the list of productions immediately following.
+        try:
+            p.lr_after = Prodnames[p.prod[n+1]]
+        except (IndexError, KeyError):
+            p.lr_after = []
+        try:
+            p.lr_before = p.prod[n-1]
+        except IndexError:
+            p.lr_before = None
+        return p
+
+# -----------------------------------------------------------------------------
+# class LRItem
+#
+# This class represents a specific stage of parsing a production rule.  For
+# example:
+#
+#       expr : expr . PLUS term
+#
+# In the above, the "." represents the current location of the parse.  Here
+# basic attributes:
+#
+#       name       - Name of the production.  For example 'expr'
+#       prod       - A list of symbols on the right side ['expr','.', 'PLUS','term']
+#       number     - Production number.
+#
+#       lr_next      Next LR item. Example, if we are ' expr -> expr . PLUS term'
+#                    then lr_next refers to 'expr -> expr PLUS . term'
+#       lr_index   - LR item index (location of the ".") in the prod list.
+#       lookaheads - LALR lookahead symbols for this item
+#       len        - Length of the production (number of symbols on right hand side)
+#       lr_after    - List of all productions that immediately follow
+#       lr_before   - Grammar symbol immediately before
+# -----------------------------------------------------------------------------
+
+class LRItem(object):
+    def __init__(self, p, n):
+        self.name       = p.name
+        self.prod       = list(p.prod)
+        self.number     = p.number
+        self.lr_index   = n
+        self.lookaheads = {}
+        self.prod.insert(n, '.')
+        self.prod       = tuple(self.prod)
+        self.len        = len(self.prod)
+        self.usyms      = p.usyms
+
+    def __str__(self):
+        if self.prod:
+            s = '%s -> %s' % (self.name, ' '.join(self.prod))
+        else:
+            s = f'{self.name} -> <empty>'
+        return s
+
+    def __repr__(self):
+        return f'LRItem({self})'
+
+# -----------------------------------------------------------------------------
+# rightmost_terminal()
+#
+# Return the rightmost terminal from a list of symbols.  Used in add_production()
+# -----------------------------------------------------------------------------
+def rightmost_terminal(symbols, terminals):
+    i = len(symbols) - 1
+    while i >= 0:
+        if symbols[i] in terminals:
+            return symbols[i]
+        i -= 1
+    return None
+
+# -----------------------------------------------------------------------------
+#                           === GRAMMAR CLASS ===
+#
+# The following class represents the contents of the specified grammar along
+# with various computed properties such as first sets, follow sets, LR items, etc.
+# This data is used for critical parts of the table generation process later.
+# -----------------------------------------------------------------------------
+
+class GrammarError(YaccError):
+    pass
+
+class Grammar(object):
+    def __init__(self, terminals):
+        self.Productions  = [None]  # A list of all of the productions.  The first
+                                    # entry is always reserved for the purpose of
+                                    # building an augmented grammar
+
+        self.Prodnames    = {}      # A dictionary mapping the names of nonterminals to a list of all
+                                    # productions of that nonterminal.
+
+        self.Prodmap      = {}      # A dictionary that is only used to detect duplicate
+                                    # productions.
+
+        self.Terminals    = {}      # A dictionary mapping the names of terminal symbols to a
+                                    # list of the rules where they are used.
+
+        for term in terminals:
+            self.Terminals[term] = []
+
+        self.Terminals['error'] = []
+
+        self.Nonterminals = {}      # A dictionary mapping names of nonterminals to a list
+                                    # of rule numbers where they are used.
+
+        self.First        = {}      # A dictionary of precomputed FIRST(x) symbols
+
+        self.Follow       = {}      # A dictionary of precomputed FOLLOW(x) symbols
+
+        self.Precedence   = {}      # Precedence rules for each terminal. Contains tuples of the
+                                    # form ('right',level) or ('nonassoc', level) or ('left',level)
+
+        self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
+                                    # This is only used to provide error checking and to generate
+                                    # a warning about unused precedence rules.
+
+        self.Start = None           # Starting symbol for the grammar
+
+
+    def __len__(self):
+        return len(self.Productions)
+
+    def __getitem__(self, index):
+        return self.Productions[index]
+
+    # -----------------------------------------------------------------------------
+    # set_precedence()
+    #
+    # Sets the precedence for a given terminal. assoc is the associativity such as
+    # 'left','right', or 'nonassoc'.  level is a numeric level.
+    #
+    # -----------------------------------------------------------------------------
+
+    def set_precedence(self, term, assoc, level):
+        assert self.Productions == [None], 'Must call set_precedence() before add_production()'
+        if term in self.Precedence:
+            raise GrammarError(f'Precedence already specified for terminal {term!r}')
+        if assoc not in ['left', 'right', 'nonassoc']:
+            raise GrammarError(f"Associativity of {term!r} must be one of 'left','right', or 'nonassoc'")
+        self.Precedence[term] = (assoc, level)
+
+    # -----------------------------------------------------------------------------
+    # add_production()
+    #
+    # Given an action function, this function assembles a production rule and
+    # computes its precedence level.
+    #
+    # The production rule is supplied as a list of symbols.   For example,
+    # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
+    # symbols ['expr','PLUS','term'].
+    #
+    # Precedence is determined by the precedence of the right-most non-terminal
+    # or the precedence of a terminal specified by %prec.
+    #
+    # A variety of error checks are performed to make sure production symbols
+    # are valid and that %prec is used correctly.
+    # -----------------------------------------------------------------------------
+
+    def add_production(self, prodname, syms, func=None, file='', line=0):
+
+        if prodname in self.Terminals:
+            raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}. Already defined as a token')
+        if prodname == 'error':
+            raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}. error is a reserved word')
+
+        # Look for literal tokens
+        for n, s in enumerate(syms):
+            if s[0] in "'\"" and s[0] == s[-1]:
+                c = s[1:-1]
+                if (len(c) != 1):
+                    raise GrammarError(f'{file}:{line}: Literal token {s} in rule {prodname!r} may only be a single character')
+                if c not in self.Terminals:
+                    self.Terminals[c] = []
+                syms[n] = c
+                continue
+
+        # Determine the precedence level
+        if '%prec' in syms:
+            if syms[-1] == '%prec':
+                raise GrammarError(f'{file}:{line}: Syntax error. Nothing follows %%prec')
+            if syms[-2] != '%prec':
+                raise GrammarError(f'{file}:{line}: Syntax error. %prec can only appear at the end of a grammar rule')
+            precname = syms[-1]
+            prodprec = self.Precedence.get(precname)
+            if not prodprec:
+                raise GrammarError(f'{file}:{line}: Nothing known about the precedence of {precname!r}')
+            else:
+                self.UsedPrecedence.add(precname)
+            del syms[-2:]     # Drop %prec from the rule
+        else:
+            # If no %prec, precedence is determined by the rightmost terminal symbol
+            precname = rightmost_terminal(syms, self.Terminals)
+            prodprec = self.Precedence.get(precname, ('right', 0))
+
+        # See if the rule is already in the rulemap
+        map = '%s -> %s' % (prodname, syms)
+        if map in self.Prodmap:
+            m = self.Prodmap[map]
+            raise GrammarError(f'{file}:{line}: Duplicate rule {m}. ' +
+                               f'Previous definition at {m.file}:{m.line}')
+
+        # From this point on, everything is valid.  Create a new Production instance
+        pnumber  = len(self.Productions)
+        if prodname not in self.Nonterminals:
+            self.Nonterminals[prodname] = []
+
+        # Add the production number to Terminals and Nonterminals
+        for t in syms:
+            if t in self.Terminals:
+                self.Terminals[t].append(pnumber)
+            else:
+                if t not in self.Nonterminals:
+                    self.Nonterminals[t] = []
+                self.Nonterminals[t].append(pnumber)
+
+        # Create a production and add it to the list of productions
+        p = Production(pnumber, prodname, syms, prodprec, func, file, line)
+        self.Productions.append(p)
+        self.Prodmap[map] = p
+
+        # Add to the global productions list
+        try:
+            self.Prodnames[prodname].append(p)
+        except KeyError:
+            self.Prodnames[prodname] = [p]
+
+    # -----------------------------------------------------------------------------
+    # set_start()
+    #
+    # Sets the starting symbol and creates the augmented grammar.  Production
+    # rule 0 is S' -> start where start is the start symbol.
+    # -----------------------------------------------------------------------------
+
+    def set_start(self, start=None):
+        if callable(start):
+            start = start.__name__
+
+        if not start:
+            start = self.Productions[1].name
+
+        if start not in self.Nonterminals:
+            raise GrammarError(f'start symbol {start} undefined')
+        self.Productions[0] = Production(0, "S'", [start])
+        self.Nonterminals[start].append(0)
+        self.Start = start
+
+    # -----------------------------------------------------------------------------
+    # find_unreachable()
+    #
+    # Find all of the nonterminal symbols that can't be reached from the starting
+    # symbol.  Returns a list of nonterminals that can't be reached.
+    # -----------------------------------------------------------------------------
+
+    def find_unreachable(self):
+
+        # Mark all symbols that are reachable from a symbol s
+        def mark_reachable_from(s):
+            if s in reachable:
+                return
+            reachable.add(s)
+            for p in self.Prodnames.get(s, []):
+                for r in p.prod:
+                    mark_reachable_from(r)
+
+        reachable = set()
+        mark_reachable_from(self.Productions[0].prod[0])
+        return [s for s in self.Nonterminals if s not in reachable]
+
+    # -----------------------------------------------------------------------------
+    # infinite_cycles()
+    #
+    # This function looks at the various parsing rules and tries to detect
+    # infinite recursion cycles (grammar rules where there is no possible way
+    # to derive a string of only terminals).
+    # -----------------------------------------------------------------------------
+
+    def infinite_cycles(self):
+        terminates = {}
+
+        # Terminals:
+        for t in self.Terminals:
+            terminates[t] = True
+
+        terminates['$end'] = True
+
+        # Nonterminals:
+
+        # Initialize to false:
+        for n in self.Nonterminals:
+            terminates[n] = False
+
+        # Then propagate termination until no change:
+        while True:
+            some_change = False
+            for (n, pl) in self.Prodnames.items():
+                # Nonterminal n terminates iff any of its productions terminates.
+                for p in pl:
+                    # Production p terminates iff all of its rhs symbols terminate.
+                    for s in p.prod:
+                        if not terminates[s]:
+                            # The symbol s does not terminate,
+                            # so production p does not terminate.
+                            p_terminates = False
+                            break
+                    else:
+                        # didn't break from the loop,
+                        # so every symbol s terminates
+                        # so production p terminates.
+                        p_terminates = True
+
+                    if p_terminates:
+                        # symbol n terminates!
+                        if not terminates[n]:
+                            terminates[n] = True
+                            some_change = True
+                        # Don't need to consider any more productions for this n.
+                        break
+
+            if not some_change:
+                break
+
+        infinite = []
+        for (s, term) in terminates.items():
+            if not term:
+                if s not in self.Prodnames and s not in self.Terminals and s != 'error':
+                    # s is used-but-not-defined, and we've already warned of that,
+                    # so it would be overkill to say that it's also non-terminating.
+                    pass
+                else:
+                    infinite.append(s)
+
+        return infinite
+
+    # -----------------------------------------------------------------------------
+    # undefined_symbols()
+    #
+    # Find all symbols that were used the grammar, but not defined as tokens or
+    # grammar rules.  Returns a list of tuples (sym, prod) where sym in the symbol
+    # and prod is the production where the symbol was used.
+    # -----------------------------------------------------------------------------
+    def undefined_symbols(self):
+        result = []
+        for p in self.Productions:
+            if not p:
+                continue
+
+            for s in p.prod:
+                if s not in self.Prodnames and s not in self.Terminals and s != 'error':
+                    result.append((s, p))
+        return result
+
+    # -----------------------------------------------------------------------------
+    # unused_terminals()
+    #
+    # Find all terminals that were defined, but not used by the grammar.  Returns
+    # a list of all symbols.
+    # -----------------------------------------------------------------------------
+    def unused_terminals(self):
+        unused_tok = []
+        for s, v in self.Terminals.items():
+            if s != 'error' and not v:
+                unused_tok.append(s)
+
+        return unused_tok
+
+    # ------------------------------------------------------------------------------
+    # unused_rules()
+    #
+    # Find all grammar rules that were defined,  but not used (maybe not reachable)
+    # Returns a list of productions.
+    # ------------------------------------------------------------------------------
+
+    def unused_rules(self):
+        unused_prod = []
+        for s, v in self.Nonterminals.items():
+            if not v:
+                p = self.Prodnames[s][0]
+                unused_prod.append(p)
+        return unused_prod
+
+    # -----------------------------------------------------------------------------
+    # unused_precedence()
+    #
+    # Returns a list of tuples (term,precedence) corresponding to precedence
+    # rules that were never used by the grammar.  term is the name of the terminal
+    # on which precedence was applied and precedence is a string such as 'left' or
+    # 'right' corresponding to the type of precedence.
+    # -----------------------------------------------------------------------------
+
+    def unused_precedence(self):
+        unused = []
+        for termname in self.Precedence:
+            if not (termname in self.Terminals or termname in self.UsedPrecedence):
+                unused.append((termname, self.Precedence[termname][0]))
+
+        return unused
+
+    # -------------------------------------------------------------------------
+    # _first()
+    #
+    # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
+    #
+    # During execution of compute_first1, the result may be incomplete.
+    # Afterward (e.g., when called from compute_follow()), it will be complete.
+    # -------------------------------------------------------------------------
+    def _first(self, beta):
+
+        # We are computing First(x1,x2,x3,...,xn)
+        result = []
+        for x in beta:
+            x_produces_empty = False
+
+            # Add all the non-<empty> symbols of First[x] to the result.
+            for f in self.First[x]:
+                if f == '<empty>':
+                    x_produces_empty = True
+                else:
+                    if f not in result:
+                        result.append(f)
+
+            if x_produces_empty:
+                # We have to consider the next x in beta,
+                # i.e. stay in the loop.
+                pass
+            else:
+                # We don't have to consider any further symbols in beta.
+                break
+        else:
+            # There was no 'break' from the loop,
+            # so x_produces_empty was true for all x in beta,
+            # so beta produces empty as well.
+            result.append('<empty>')
+
+        return result
+
+    # -------------------------------------------------------------------------
+    # compute_first()
+    #
+    # Compute the value of FIRST1(X) for all symbols
+    # -------------------------------------------------------------------------
+    def compute_first(self):
+        if self.First:
+            return self.First
+
+        # Terminals:
+        for t in self.Terminals:
+            self.First[t] = [t]
+
+        self.First['$end'] = ['$end']
+
+        # Nonterminals:
+
+        # Initialize to the empty set:
+        for n in self.Nonterminals:
+            self.First[n] = []
+
+        # Then propagate symbols until no change:
+        while True:
+            some_change = False
+            for n in self.Nonterminals:
+                for p in self.Prodnames[n]:
+                    for f in self._first(p.prod):
+                        if f not in self.First[n]:
+                            self.First[n].append(f)
+                            some_change = True
+            if not some_change:
+                break
+
+        return self.First
+
+    # ---------------------------------------------------------------------
+    # compute_follow()
+    #
+    # Computes all of the follow sets for every non-terminal symbol.  The
+    # follow set is the set of all symbols that might follow a given
+    # non-terminal.  See the Dragon book, 2nd Ed. p. 189.
+    # ---------------------------------------------------------------------
+    def compute_follow(self, start=None):
+        # If already computed, return the result
+        if self.Follow:
+            return self.Follow
+
+        # If first sets not computed yet, do that first.
+        if not self.First:
+            self.compute_first()
+
+        # Add '$end' to the follow list of the start symbol
+        for k in self.Nonterminals:
+            self.Follow[k] = []
+
+        if not start:
+            start = self.Productions[1].name
+
+        self.Follow[start] = ['$end']
+
+        while True:
+            didadd = False
+            for p in self.Productions[1:]:
+                # Here is the production set
+                for i, B in enumerate(p.prod):
+                    if B in self.Nonterminals:
+                        # Okay. We got a non-terminal in a production
+                        fst = self._first(p.prod[i+1:])
+                        hasempty = False
+                        for f in fst:
+                            if f != '<empty>' and f not in self.Follow[B]:
+                                self.Follow[B].append(f)
+                                didadd = True
+                            if f == '<empty>':
+                                hasempty = True
+                        if hasempty or i == (len(p.prod)-1):
+                            # Add elements of follow(a) to follow(b)
+                            for f in self.Follow[p.name]:
+                                if f not in self.Follow[B]:
+                                    self.Follow[B].append(f)
+                                    didadd = True
+            if not didadd:
+                break
+        return self.Follow
+
+
+    # -----------------------------------------------------------------------------
+    # build_lritems()
+    #
+    # This function walks the list of productions and builds a complete set of the
+    # LR items.  The LR items are stored in two ways:  First, they are uniquely
+    # numbered and placed in the list _lritems.  Second, a linked list of LR items
+    # is built for each production.  For example:
+    #
+    #   E -> E PLUS E
+    #
+    # Creates the list
+    #
+    #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
+    # -----------------------------------------------------------------------------
+
+    def build_lritems(self):
+        for p in self.Productions:
+            lastlri = p
+            i = 0
+            lr_items = []
+            while True:
+                if i > len(p):
+                    lri = None
+                else:
+                    lri = LRItem(p, i)
+                    # Precompute the list of productions immediately following
+                    try:
+                        lri.lr_after = self.Prodnames[lri.prod[i+1]]
+                    except (IndexError, KeyError):
+                        lri.lr_after = []
+                    try:
+                        lri.lr_before = lri.prod[i-1]
+                    except IndexError:
+                        lri.lr_before = None
+
+                lastlri.lr_next = lri
+                if not lri:
+                    break
+                lr_items.append(lri)
+                lastlri = lri
+                i += 1
+            p.lr_items = lr_items
+
+
+    # ----------------------------------------------------------------------
+    # Debugging output.  Printing the grammar will produce a detailed
+    # description along with some diagnostics.
+    # ----------------------------------------------------------------------
+    def __str__(self):
+        out = []
+        out.append('Grammar:\n')
+        for n, p in enumerate(self.Productions):
+            out.append(f'Rule {n:<5d} {p}')
+        
+        unused_terminals = self.unused_terminals()
+        if unused_terminals:
+            out.append('\nUnused terminals:\n')
+            for term in unused_terminals:
+                out.append(f'    {term}')
+
+        out.append('\nTerminals, with rules where they appear:\n')
+        for term in sorted(self.Terminals):
+            out.append('%-20s : %s' % (term, ' '.join(str(s) for s in self.Terminals[term])))
+
+        out.append('\nNonterminals, with rules where they appear:\n')
+        for nonterm in sorted(self.Nonterminals):
+            out.append('%-20s : %s' % (nonterm, ' '.join(str(s) for s in self.Nonterminals[nonterm])))
+
+        out.append('')
+        return '\n'.join(out)
+
+# -----------------------------------------------------------------------------
+#                           === LR Generator ===
+#
+# The following classes and functions are used to generate LR parsing tables on
+# a grammar.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# digraph()
+# traverse()
+#
+# The following two functions are used to compute set valued functions
+# of the form:
+#
+#     F(x) = F'(x) U U{F(y) | x R y}
+#
+# This is used to compute the values of Read() sets as well as FOLLOW sets
+# in LALR(1) generation.
+#
+# Inputs:  X    - An input set
+#          R    - A relation
+#          FP   - Set-valued function
+# ------------------------------------------------------------------------------
+
+def digraph(X, R, FP):
+    N = {}
+    for x in X:
+        N[x] = 0
+    stack = []
+    F = {}
+    for x in X:
+        if N[x] == 0:
+            traverse(x, N, stack, F, X, R, FP)
+    return F
+
+def traverse(x, N, stack, F, X, R, FP):
+    stack.append(x)
+    d = len(stack)
+    N[x] = d
+    F[x] = FP(x)             # F(X) <- F'(x)
+
+    rel = R(x)               # Get y's related to x
+    for y in rel:
+        if N[y] == 0:
+            traverse(y, N, stack, F, X, R, FP)
+        N[x] = min(N[x], N[y])
+        for a in F.get(y, []):
+            if a not in F[x]:
+                F[x].append(a)
+    if N[x] == d:
+        N[stack[-1]] = MAXINT
+        F[stack[-1]] = F[x]
+        element = stack.pop()
+        while element != x:
+            N[stack[-1]] = MAXINT
+            F[stack[-1]] = F[x]
+            element = stack.pop()
+
+class LALRError(YaccError):
+    pass
+
+# -----------------------------------------------------------------------------
+#                             == LRGeneratedTable ==
+#
+# This class implements the LR table generation algorithm.  There are no
+# public methods except for write()
+# -----------------------------------------------------------------------------
+
+class LRTable(object):
+    def __init__(self, grammar):
+        self.grammar = grammar
+
+        # Internal attributes
+        self.lr_action     = {}        # Action table
+        self.lr_goto       = {}        # Goto table
+        self.lr_productions  = grammar.Productions    # Copy of grammar Production array
+        self.lr_goto_cache = {}        # Cache of computed gotos
+        self.lr0_cidhash   = {}        # Cache of closures
+        self._add_count    = 0         # Internal counter used to detect cycles
+
+        # Diagonistic information filled in by the table generator
+        self.state_descriptions = OrderedDict()
+        self.sr_conflict   = 0
+        self.rr_conflict   = 0
+        self.conflicts     = []        # List of conflicts
+
+        self.sr_conflicts  = []
+        self.rr_conflicts  = []
+
+        # Build the tables
+        self.grammar.build_lritems()
+        self.grammar.compute_first()
+        self.grammar.compute_follow()
+        self.lr_parse_table()
+
+        # Build default states
+        # This identifies parser states where there is only one possible reduction action.
+        # For such states, the parser can make a choose to make a rule reduction without consuming
+        # the next look-ahead token.  This delayed invocation of the tokenizer can be useful in
+        # certain kinds of advanced parsing situations where the lexer and parser interact with
+        # each other or change states (i.e., manipulation of scope, lexer states, etc.).
+        #
+        # See:  http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
+        self.defaulted_states = {}
+        for state, actions in self.lr_action.items():
+            rules = list(actions.values())
+            if len(rules) == 1 and rules[0] < 0:
+                self.defaulted_states[state] = rules[0]
+
+    # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
+    def lr0_closure(self, I):
+        self._add_count += 1
+
+        # Add everything in I to J
+        J = I[:]
+        didadd = True
+        while didadd:
+            didadd = False
+            for j in J:
+                for x in j.lr_after:
+                    if getattr(x, 'lr0_added', 0) == self._add_count:
+                        continue
+                    # Add B --> .G to J
+                    J.append(x.lr_next)
+                    x.lr0_added = self._add_count
+                    didadd = True
+
+        return J
+
+    # Compute the LR(0) goto function goto(I,X) where I is a set
+    # of LR(0) items and X is a grammar symbol.   This function is written
+    # in a way that guarantees uniqueness of the generated goto sets
+    # (i.e. the same goto set will never be returned as two different Python
+    # objects).  With uniqueness, we can later do fast set comparisons using
+    # id(obj) instead of element-wise comparison.
+
+    def lr0_goto(self, I, x):
+        # First we look for a previously cached entry
+        g = self.lr_goto_cache.get((id(I), x))
+        if g:
+            return g
+
+        # Now we generate the goto set in a way that guarantees uniqueness
+        # of the result
+
+        s = self.lr_goto_cache.get(x)
+        if not s:
+            s = {}
+            self.lr_goto_cache[x] = s
+
+        gs = []
+        for p in I:
+            n = p.lr_next
+            if n and n.lr_before == x:
+                s1 = s.get(id(n))
+                if not s1:
+                    s1 = {}
+                    s[id(n)] = s1
+                gs.append(n)
+                s = s1
+        g = s.get('$end')
+        if not g:
+            if gs:
+                g = self.lr0_closure(gs)
+                s['$end'] = g
+            else:
+                s['$end'] = gs
+        self.lr_goto_cache[(id(I), x)] = g
+        return g
+
+    # Compute the LR(0) sets of item function
+    def lr0_items(self):
+        C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
+        i = 0
+        for I in C:
+            self.lr0_cidhash[id(I)] = i
+            i += 1
+
+        # Loop over the items in C and each grammar symbols
+        i = 0
+        while i < len(C):
+            I = C[i]
+            i += 1
+
+            # Collect all of the symbols that could possibly be in the goto(I,X) sets
+            asyms = {}
+            for ii in I:
+                for s in ii.usyms:
+                    asyms[s] = None
+
+            for x in asyms:
+                g = self.lr0_goto(I, x)
+                if not g or id(g) in self.lr0_cidhash:
+                    continue
+                self.lr0_cidhash[id(g)] = len(C)
+                C.append(g)
+
+        return C
+
+    # -----------------------------------------------------------------------------
+    #                       ==== LALR(1) Parsing ====
+    #
+    # LALR(1) parsing is almost exactly the same as SLR except that instead of
+    # relying upon Follow() sets when performing reductions, a more selective
+    # lookahead set that incorporates the state of the LR(0) machine is utilized.
+    # Thus, we mainly just have to focus on calculating the lookahead sets.
+    #
+    # The method used here is due to DeRemer and Pennelo (1982).
+    #
+    # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
+    #     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
+    #     Vol. 4, No. 4, Oct. 1982, pp. 615-649
+    #
+    # Further details can also be found in:
+    #
+    #  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
+    #      McGraw-Hill Book Company, (1985).
+    #
+    # -----------------------------------------------------------------------------
+
+    # -----------------------------------------------------------------------------
+    # compute_nullable_nonterminals()
+    #
+    # Creates a dictionary containing all of the non-terminals that might produce
+    # an empty production.
+    # -----------------------------------------------------------------------------
+
+    def compute_nullable_nonterminals(self):
+        nullable = set()
+        num_nullable = 0
+        while True:
+            for p in self.grammar.Productions[1:]:
+                if p.len == 0:
+                    nullable.add(p.name)
+                    continue
+                for t in p.prod:
+                    if t not in nullable:
+                        break
+                else:
+                    nullable.add(p.name)
+            if len(nullable) == num_nullable:
+                break
+            num_nullable = len(nullable)
+        return nullable
+
+    # -----------------------------------------------------------------------------
+    # find_nonterminal_trans(C)
+    #
+    # Given a set of LR(0) items, this functions finds all of the non-terminal
+    # transitions.    These are transitions in which a dot appears immediately before
+    # a non-terminal.   Returns a list of tuples of the form (state,N) where state
+    # is the state number and N is the nonterminal symbol.
+    #
+    # The input C is the set of LR(0) items.
+    # -----------------------------------------------------------------------------
+
+    def find_nonterminal_transitions(self, C):
+        trans = []
+        for stateno, state in enumerate(C):
+            for p in state:
+                if p.lr_index < p.len - 1:
+                    t = (stateno, p.prod[p.lr_index+1])
+                    if t[1] in self.grammar.Nonterminals:
+                        if t not in trans:
+                            trans.append(t)
+        return trans
+
+    # -----------------------------------------------------------------------------
+    # dr_relation()
+    #
+    # Computes the DR(p,A) relationships for non-terminal transitions.  The input
+    # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
+    #
+    # Returns a list of terminals.
+    # -----------------------------------------------------------------------------
+
+    def dr_relation(self, C, trans, nullable):
+        dr_set = {}
+        state, N = trans
+        terms = []
+
+        g = self.lr0_goto(C[state], N)
+        for p in g:
+            if p.lr_index < p.len - 1:
+                a = p.prod[p.lr_index+1]
+                if a in self.grammar.Terminals:
+                    if a not in terms:
+                        terms.append(a)
+
+        # This extra bit is to handle the start state
+        if state == 0 and N == self.grammar.Productions[0].prod[0]:
+            terms.append('$end')
+
+        return terms
+
+    # -----------------------------------------------------------------------------
+    # reads_relation()
+    #
+    # Computes the READS() relation (p,A) READS (t,C).
+    # -----------------------------------------------------------------------------
+
+    def reads_relation(self, C, trans, empty):
+        # Look for empty transitions
+        rel = []
+        state, N = trans
+
+        g = self.lr0_goto(C[state], N)
+        j = self.lr0_cidhash.get(id(g), -1)
+        for p in g:
+            if p.lr_index < p.len - 1:
+                a = p.prod[p.lr_index + 1]
+                if a in empty:
+                    rel.append((j, a))
+
+        return rel
+
+    # -----------------------------------------------------------------------------
+    # compute_lookback_includes()
+    #
+    # Determines the lookback and includes relations
+    #
+    # LOOKBACK:
+    #
+    # This relation is determined by running the LR(0) state machine forward.
+    # For example, starting with a production "N : . A B C", we run it forward
+    # to obtain "N : A B C ."   We then build a relationship between this final
+    # state and the starting state.   These relationships are stored in a dictionary
+    # lookdict.
+    #
+    # INCLUDES:
+    #
+    # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
+    #
+    # This relation is used to determine non-terminal transitions that occur
+    # inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
+    # if the following holds:
+    #
+    #       B -> LAT, where T -> epsilon and p' -L-> p
+    #
+    # L is essentially a prefix (which may be empty), T is a suffix that must be
+    # able to derive an empty string.  State p' must lead to state p with the string L.
+    #
+    # -----------------------------------------------------------------------------
+
+    def compute_lookback_includes(self, C, trans, nullable):
+        lookdict = {}          # Dictionary of lookback relations
+        includedict = {}       # Dictionary of include relations
+
+        # Make a dictionary of non-terminal transitions
+        dtrans = {}
+        for t in trans:
+            dtrans[t] = 1
+
+        # Loop over all transitions and compute lookbacks and includes
+        for state, N in trans:
+            lookb = []
+            includes = []
+            for p in C[state]:
+                if p.name != N:
+                    continue
+
+                # Okay, we have a name match.  We now follow the production all the way
+                # through the state machine until we get the . on the right hand side
+
+                lr_index = p.lr_index
+                j = state
+                while lr_index < p.len - 1:
+                    lr_index = lr_index + 1
+                    t = p.prod[lr_index]
+
+                    # Check to see if this symbol and state are a non-terminal transition
+                    if (j, t) in dtrans:
+                        # Yes.  Okay, there is some chance that this is an includes relation
+                        # the only way to know for certain is whether the rest of the
+                        # production derives empty
+
+                        li = lr_index + 1
+                        while li < p.len:
+                            if p.prod[li] in self.grammar.Terminals:
+                                break      # No forget it
+                            if p.prod[li] not in nullable:
+                                break
+                            li = li + 1
+                        else:
+                            # Appears to be a relation between (j,t) and (state,N)
+                            includes.append((j, t))
+
+                    g = self.lr0_goto(C[j], t)               # Go to next set
+                    j = self.lr0_cidhash.get(id(g), -1)      # Go to next state
+
+                # When we get here, j is the final state, now we have to locate the production
+                for r in C[j]:
+                    if r.name != p.name:
+                        continue
+                    if r.len != p.len:
+                        continue
+                    i = 0
+                    # This look is comparing a production ". A B C" with "A B C ."
+                    while i < r.lr_index:
+                        if r.prod[i] != p.prod[i+1]:
+                            break
+                        i = i + 1
+                    else:
+                        lookb.append((j, r))
+            for i in includes:
+                if i not in includedict:
+                    includedict[i] = []
+                includedict[i].append((state, N))
+            lookdict[(state, N)] = lookb
+
+        return lookdict, includedict
+
+    # -----------------------------------------------------------------------------
+    # compute_read_sets()
+    #
+    # Given a set of LR(0) items, this function computes the read sets.
+    #
+    # Inputs:  C        =  Set of LR(0) items
+    #          ntrans   = Set of nonterminal transitions
+    #          nullable = Set of empty transitions
+    #
+    # Returns a set containing the read sets
+    # -----------------------------------------------------------------------------
+
+    def compute_read_sets(self, C, ntrans, nullable):
+        FP = lambda x: self.dr_relation(C, x, nullable)
+        R =  lambda x: self.reads_relation(C, x, nullable)
+        F = digraph(ntrans, R, FP)
+        return F
+
+    # -----------------------------------------------------------------------------
+    # compute_follow_sets()
+    #
+    # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
+    # and an include set, this function computes the follow sets
+    #
+    # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
+    #
+    # Inputs:
+    #            ntrans     = Set of nonterminal transitions
+    #            readsets   = Readset (previously computed)
+    #            inclsets   = Include sets (previously computed)
+    #
+    # Returns a set containing the follow sets
+    # -----------------------------------------------------------------------------
+
+    def compute_follow_sets(self, ntrans, readsets, inclsets):
+        FP = lambda x: readsets[x]
+        R  = lambda x: inclsets.get(x, [])
+        F = digraph(ntrans, R, FP)
+        return F
+
+    # -----------------------------------------------------------------------------
+    # add_lookaheads()
+    #
+    # Attaches the lookahead symbols to grammar rules.
+    #
+    # Inputs:    lookbacks         -  Set of lookback relations
+    #            followset         -  Computed follow set
+    #
+    # This function directly attaches the lookaheads to productions contained
+    # in the lookbacks set
+    # -----------------------------------------------------------------------------
+
+    def add_lookaheads(self, lookbacks, followset):
+        for trans, lb in lookbacks.items():
+            # Loop over productions in lookback
+            for state, p in lb:
+                if state not in p.lookaheads:
+                    p.lookaheads[state] = []
+                f = followset.get(trans, [])
+                for a in f:
+                    if a not in p.lookaheads[state]:
+                        p.lookaheads[state].append(a)
+
+    # -----------------------------------------------------------------------------
+    # add_lalr_lookaheads()
+    #
+    # This function does all of the work of adding lookahead information for use
+    # with LALR parsing
+    # -----------------------------------------------------------------------------
+
+    def add_lalr_lookaheads(self, C):
+        # Determine all of the nullable nonterminals
+        nullable = self.compute_nullable_nonterminals()
+
+        # Find all non-terminal transitions
+        trans = self.find_nonterminal_transitions(C)
+
+        # Compute read sets
+        readsets = self.compute_read_sets(C, trans, nullable)
+
+        # Compute lookback/includes relations
+        lookd, included = self.compute_lookback_includes(C, trans, nullable)
+
+        # Compute LALR FOLLOW sets
+        followsets = self.compute_follow_sets(trans, readsets, included)
+
+        # Add all of the lookaheads
+        self.add_lookaheads(lookd, followsets)
+
+    # -----------------------------------------------------------------------------
+    # lr_parse_table()
+    #
+    # This function constructs the final LALR parse table.  Touch this code and die.
+    # -----------------------------------------------------------------------------
+    def lr_parse_table(self):
+        Productions = self.grammar.Productions
+        Precedence  = self.grammar.Precedence
+        goto   = self.lr_goto         # Goto array
+        action = self.lr_action       # Action array
+
+        actionp = {}                  # Action production array (temporary)
+
+        # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
+        # This determines the number of states
+
+        C = self.lr0_items()
+        self.add_lalr_lookaheads(C)
+
+        # Build the parser table, state by state
+        for st, I in enumerate(C):
+            descrip = []
+            # Loop over each production in I
+            actlist = []              # List of actions
+            st_action  = {}
+            st_actionp = {}
+            st_goto    = {}
+
+            descrip.append(f'\nstate {st}\n')
+            for p in I:
+                descrip.append(f'    ({p.number}) {p}')
+
+            for p in I:
+                    if p.len == p.lr_index + 1:
+                        if p.name == "S'":
+                            # Start symbol. Accept!
+                            st_action['$end'] = 0
+                            st_actionp['$end'] = p
+                        else:
+                            # We are at the end of a production.  Reduce!
+                            laheads = p.lookaheads[st]
+                            for a in laheads:
+                                actlist.append((a, p, f'reduce using rule {p.number} ({p})'))
+                                r = st_action.get(a)
+                                if r is not None:
+                                    # Have a shift/reduce or reduce/reduce conflict
+                                    if r > 0:
+                                        # Need to decide on shift or reduce here
+                                        # By default we favor shifting. Need to add
+                                        # some precedence rules here.
+
+                                        # Shift precedence comes from the token
+                                        sprec, slevel = Precedence.get(a, ('right', 0))
+
+                                        # Reduce precedence comes from rule being reduced (p)
+                                        rprec, rlevel = Productions[p.number].prec
+
+                                        if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
+                                            # We really need to reduce here.
+                                            st_action[a] = -p.number
+                                            st_actionp[a] = p
+                                            if not slevel and not rlevel:
+                                                descrip.append(f'  ! shift/reduce conflict for {a} resolved as reduce')
+                                                self.sr_conflicts.append((st, a, 'reduce'))
+                                            Productions[p.number].reduced += 1
+                                        elif (slevel == rlevel) and (rprec == 'nonassoc'):
+                                            st_action[a] = None
+                                        else:
+                                            # Hmmm. Guess we'll keep the shift
+                                            if not rlevel:
+                                                descrip.append(f'  ! shift/reduce conflict for {a} resolved as shift')
+                                                self.sr_conflicts.append((st, a, 'shift'))
+                                    elif r <= 0:
+                                        # Reduce/reduce conflict.   In this case, we favor the rule
+                                        # that was defined first in the grammar file
+                                        oldp = Productions[-r]
+                                        pp = Productions[p.number]
+                                        if oldp.line > pp.line:
+                                            st_action[a] = -p.number
+                                            st_actionp[a] = p
+                                            chosenp, rejectp = pp, oldp
+                                            Productions[p.number].reduced += 1
+                                            Productions[oldp.number].reduced -= 1
+                                        else:
+                                            chosenp, rejectp = oldp, pp
+                                        self.rr_conflicts.append((st, chosenp, rejectp))
+                                        descrip.append('  ! reduce/reduce conflict for %s resolved using rule %d (%s)' % 
+                                                       (a, st_actionp[a].number, st_actionp[a]))
+                                    else:
+                                        raise LALRError(f'Unknown conflict in state {st}')
+                                else:
+                                    st_action[a] = -p.number
+                                    st_actionp[a] = p
+                                    Productions[p.number].reduced += 1
+                    else:
+                        i = p.lr_index
+                        a = p.prod[i+1]       # Get symbol right after the "."
+                        if a in self.grammar.Terminals:
+                            g = self.lr0_goto(I, a)
+                            j = self.lr0_cidhash.get(id(g), -1)
+                            if j >= 0:
+                                # We are in a shift state
+                                actlist.append((a, p, f'shift and go to state {j}'))
+                                r = st_action.get(a)
+                                if r is not None:
+                                    # Whoa have a shift/reduce or shift/shift conflict
+                                    if r > 0:
+                                        if r != j:
+                                            raise LALRError(f'Shift/shift conflict in state {st}')
+                                    elif r <= 0:
+                                        # Do a precedence check.
+                                        #   -  if precedence of reduce rule is higher, we reduce.
+                                        #   -  if precedence of reduce is same and left assoc, we reduce.
+                                        #   -  otherwise we shift
+                                        rprec, rlevel = Productions[st_actionp[a].number].prec
+                                        sprec, slevel = Precedence.get(a, ('right', 0))
+                                        if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
+                                            # We decide to shift here... highest precedence to shift
+                                            Productions[st_actionp[a].number].reduced -= 1
+                                            st_action[a] = j
+                                            st_actionp[a] = p
+                                            if not rlevel:
+                                                descrip.append(f'  ! shift/reduce conflict for {a} resolved as shift')
+                                                self.sr_conflicts.append((st, a, 'shift'))
+                                        elif (slevel == rlevel) and (rprec == 'nonassoc'):
+                                            st_action[a] = None
+                                        else:
+                                            # Hmmm. Guess we'll keep the reduce
+                                            if not slevel and not rlevel:
+                                                descrip.append(f'  ! shift/reduce conflict for {a} resolved as reduce')
+                                                self.sr_conflicts.append((st, a, 'reduce'))
+
+                                    else:
+                                        raise LALRError(f'Unknown conflict in state {st}')
+                                else:
+                                    st_action[a] = j
+                                    st_actionp[a] = p
+
+            # Print the actions associated with each terminal
+            _actprint = {}
+            for a, p, m in actlist:
+                if a in st_action:
+                    if p is st_actionp[a]:
+                        descrip.append(f'    {a:<15s} {m}')
+                        _actprint[(a, m)] = 1
+            descrip.append('')
+
+            # Construct the goto table for this state
+            nkeys = {}
+            for ii in I:
+                for s in ii.usyms:
+                    if s in self.grammar.Nonterminals:
+                        nkeys[s] = None
+            for n in nkeys:
+                g = self.lr0_goto(I, n)
+                j = self.lr0_cidhash.get(id(g), -1)
+                if j >= 0:
+                    st_goto[n] = j
+                    descrip.append(f'    {n:<30s} shift and go to state {j}')
+
+            action[st] = st_action
+            actionp[st] = st_actionp
+            goto[st] = st_goto
+            self.state_descriptions[st] = '\n'.join(descrip)
+
+    # ----------------------------------------------------------------------
+    # Debugging output.   Printing the LRTable object will produce a listing
+    # of all of the states, conflicts, and other details.
+    # ----------------------------------------------------------------------
+    def __str__(self):
+        out = []
+        for descrip in self.state_descriptions.values():
+            out.append(descrip)
+            
+        if self.sr_conflicts or self.rr_conflicts:
+            out.append('\nConflicts:\n')
+
+            for state, tok, resolution in self.sr_conflicts:
+                out.append(f'shift/reduce conflict for {tok} in state {state} resolved as {resolution}')
+
+            already_reported = set()
+            for state, rule, rejected in self.rr_conflicts:
+                if (state, id(rule), id(rejected)) in already_reported:
+                    continue
+                out.append(f'reduce/reduce conflict in state {state} resolved using rule {rule}')
+                out.append(f'rejected rule ({rejected}) in state {state}')
+                already_reported.add((state, id(rule), id(rejected)))
+
+            warned_never = set()
+            for state, rule, rejected in self.rr_conflicts:
+                if not rejected.reduced and (rejected not in warned_never):
+                    out.append(f'Rule ({rejected}) is never reduced')
+                    warned_never.add(rejected)
+
+        return '\n'.join(out)
+
+# Collect grammar rules from a function
+def _collect_grammar_rules(func):
+    grammar = []
+    while func:
+        prodname = func.__name__
+        unwrapped = inspect.unwrap(func)
+        filename = unwrapped.__code__.co_filename
+        lineno = unwrapped.__code__.co_firstlineno
+        for rule, lineno in zip(func.rules, range(lineno+len(func.rules)-1, 0, -1)):
+            syms = rule.split()
+            if syms[1:2] == [':'] or syms[1:2] == ['::=']:
+                grammar.append((func, filename, lineno, syms[0], syms[2:]))
+            else:
+                grammar.append((func, filename, lineno, prodname, syms))
+        func = getattr(func, 'next_func', None)
+
+    return grammar
+
+class ParserMetaDict(dict):
+    '''
+    Dictionary that allows decorated grammar rule functions to be overloaded
+    '''
+    def __setitem__(self, key, value):
+        if key in self and callable(value) and hasattr(value, 'rules'):
+            value.next_func = self[key]
+            if not hasattr(value.next_func, 'rules'):
+                raise GrammarError(f'Redefinition of {key}. Perhaps an earlier {key} is missing @_')
+        super().__setitem__(key, value)
+    
+    def __getitem__(self, key):
+        if key not in self and key.isupper() and key[:1] != '_':
+            return key.upper()
+        else:
+            return super().__getitem__(key)
+
+class ParserMeta(type):
+    @classmethod
+    def __prepare__(meta, *args, **kwargs):
+        d = ParserMetaDict()
+        def _(rule, *extra):
+            rules = [rule, *extra]
+            def decorate(func):
+                func.rules = [ *getattr(func, 'rules', []), *rules[::-1] ]
+                return func
+            return decorate
+        d['_'] = _
+        return d
+
+    def __new__(meta, clsname, bases, attributes):
+        del attributes['_']
+        cls = super().__new__(meta, clsname, bases, attributes)
+        cls._build(list(attributes.items()))
+        return cls
+
+class Parser(metaclass=ParserMeta):
+    # Logging object where debugging/diagnostic messages are sent
+    log = SlyLogger(sys.stderr)     
+
+    # Debugging filename where parsetab.out data can be written
+    debugfile = None
+
+    @classmethod
+    def __validate_tokens(cls):
+        if not hasattr(cls, 'tokens'):
+            cls.log.error('No token list is defined')
+            return False
+
+        if not cls.tokens:
+            cls.log.error('tokens is empty')
+            return False
+
+        if 'error' in cls.tokens:
+            cls.log.error("Illegal token name 'error'. Is a reserved word")
+            return False
+
+        return True
+
+    @classmethod
+    def __validate_precedence(cls):
+        if not hasattr(cls, 'precedence'):
+            cls.__preclist = []
+            return True
+
+        preclist = []
+        if not isinstance(cls.precedence, (list, tuple)):
+            cls.log.error('precedence must be a list or tuple')
+            return False
+
+        for level, p in enumerate(cls.precedence, start=1):
+            if not isinstance(p, (list, tuple)):
+                cls.log.error(f'Bad precedence table entry {p!r}. Must be a list or tuple')
+                return False
+
+            if len(p) < 2:
+                cls.log.error(f'Malformed precedence entry {p!r}. Must be (assoc, term, ..., term)')
+                return False
+
+            if not all(isinstance(term, str) for term in p):
+                cls.log.error('precedence items must be strings')
+                return False
+            
+            assoc = p[0]
+            preclist.extend((term, assoc, level) for term in p[1:])
+
+        cls.__preclist = preclist
+        return True
+
+    @classmethod
+    def __validate_specification(cls):
+        '''
+        Validate various parts of the grammar specification
+        '''
+        if not cls.__validate_tokens():
+            return False
+        if not cls.__validate_precedence():
+            return False
+        return True
+
+    @classmethod
+    def __build_grammar(cls, rules):
+        '''
+        Build the grammar from the grammar rules
+        '''
+        grammar_rules = []
+        errors = ''
+        # Check for non-empty symbols
+        if not rules:
+            raise YaccError('No grammar rules are defined')
+
+        grammar = Grammar(cls.tokens)
+
+        # Set the precedence level for terminals
+        for term, assoc, level in cls.__preclist:
+            try:
+                grammar.set_precedence(term, assoc, level)
+            except GrammarError as e:
+                errors += f'{e}\n'
+
+        for name, func in rules:
+            try:
+                parsed_rule = _collect_grammar_rules(func)
+                for pfunc, rulefile, ruleline, prodname, syms in parsed_rule:
+                    try:
+                        grammar.add_production(prodname, syms, pfunc, rulefile, ruleline)
+                    except GrammarError as e:
+                        errors += f'{e}\n'
+            except SyntaxError as e:
+                errors += f'{e}\n'
+        try:
+            grammar.set_start(getattr(cls, 'start', None))
+        except GrammarError as e:
+            errors += f'{e}\n'
+
+        undefined_symbols = grammar.undefined_symbols()
+        for sym, prod in undefined_symbols:
+            errors += '%s:%d: Symbol %r used, but not defined as a token or a rule\n' % (prod.file, prod.line, sym)
+
+        unused_terminals = grammar.unused_terminals()
+        if unused_terminals:
+            unused_str = '{' + ','.join(unused_terminals) + '}'
+            cls.log.warning(f'Token{"(s)" if len(unused_terminals) >1 else ""} {unused_str} defined, but not used')
+
+        unused_rules = grammar.unused_rules()
+        for prod in unused_rules:
+            cls.log.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
+
+        if len(unused_terminals) == 1:
+            cls.log.warning('There is 1 unused token')
+        if len(unused_terminals) > 1:
+            cls.log.warning('There are %d unused tokens', len(unused_terminals))
+
+        if len(unused_rules) == 1:
+            cls.log.warning('There is 1 unused rule')
+        if len(unused_rules) > 1:
+            cls.log.warning('There are %d unused rules', len(unused_rules))
+
+        unreachable = grammar.find_unreachable()
+        for u in unreachable:
+           cls.log.warning('Symbol %r is unreachable', u)
+
+        if len(undefined_symbols) == 0:
+            infinite = grammar.infinite_cycles()
+            for inf in infinite:
+                errors += 'Infinite recursion detected for symbol %r\n' % inf
+
+        unused_prec = grammar.unused_precedence()
+        for term, assoc in unused_prec:
+            errors += 'Precedence rule %r defined for unknown symbol %r\n' % (assoc, term)
+
+        cls._grammar = grammar
+        if errors:
+            raise YaccError('Unable to build grammar.\n'+errors)
+
+    @classmethod
+    def __build_lrtables(cls):
+        '''
+        Build the LR Parsing tables from the grammar
+        '''
+        lrtable = LRTable(cls._grammar)
+        num_sr = len(lrtable.sr_conflicts)
+
+        # Report shift/reduce and reduce/reduce conflicts
+        if num_sr != getattr(cls, 'expected_shift_reduce', None):
+            if num_sr == 1:
+                cls.log.warning('1 shift/reduce conflict')
+            elif num_sr > 1:
+                cls.log.warning('%d shift/reduce conflicts', num_sr)
+
+        num_rr = len(lrtable.rr_conflicts)
+        if num_rr != getattr(cls, 'expected_reduce_reduce', None):
+            if num_rr == 1:
+                cls.log.warning('1 reduce/reduce conflict')
+            elif num_rr > 1:
+                cls.log.warning('%d reduce/reduce conflicts', num_rr)
+
+        cls._lrtable = lrtable
+        return True
+
+    @classmethod
+    def __collect_rules(cls, definitions):
+        '''
+        Collect all of the tagged grammar rules
+        '''
+        rules = [ (name, value) for name, value in definitions
+                  if callable(value) and hasattr(value, 'rules') ]
+        return rules
+
+    # ----------------------------------------------------------------------
+    # Build the LALR(1) tables. definitions is a list of (name, item) tuples
+    # of all definitions provided in the class, listed in the order in which
+    # they were defined.  This method is triggered by a metaclass.
+    # ----------------------------------------------------------------------
+    @classmethod
+    def _build(cls, definitions):
+        if vars(cls).get('_build', False):
+            return
+
+        # Collect all of the grammar rules from the class definition
+        rules = cls.__collect_rules(definitions)
+
+        # Validate other parts of the grammar specification
+        if not cls.__validate_specification():
+            raise YaccError('Invalid parser specification')
+
+        # Build the underlying grammar object
+        cls.__build_grammar(rules)
+
+        # Build the LR tables
+        if not cls.__build_lrtables():
+            raise YaccError('Can\'t build parsing tables')
+
+        if cls.debugfile:
+            with open(cls.debugfile, 'w') as f:
+                f.write(str(cls._grammar))
+                f.write('\n')
+                f.write(str(cls._lrtable))
+            cls.log.info('Parser debugging for %s written to %s', cls.__qualname__, cls.debugfile)
+
+    # ----------------------------------------------------------------------
+    # Parsing Support.  This is the parsing runtime that users use to
+    # ----------------------------------------------------------------------
+    def error(self, token):
+        '''
+        Default error handling function.  This may be subclassed.
+        '''
+        if token:
+            lineno = getattr(token, 'lineno', 0)
+            if lineno:
+                sys.stderr.write(f'sly: Syntax error at line {lineno}, token={token.type}\n')
+            else:
+                sys.stderr.write(f'sly: Syntax error, token={token.type}')
+        else:
+            sys.stderr.write('sly: Parse error in input. EOF\n')
+ 
+    def errok(self):
+        '''
+        Clear the error status
+        '''
+        self.errorok = True
+
+    def restart(self):
+        '''
+        Force the parser to restart from a fresh state. Clears the statestack
+        '''
+        del self.statestack[:]
+        del self.symstack[:]
+        sym = YaccSymbol()
+        sym.type = '$end'
+        self.symstack.append(sym)
+        self.statestack.append(0)
+        self.state = 0
+
+    def parse(self, tokens):
+        '''
+        Parse the given input tokens.
+        '''
+        lookahead = None                                  # Current lookahead symbol
+        lookaheadstack = []                               # Stack of lookahead symbols
+        actions = self._lrtable.lr_action                 # Local reference to action table (to avoid lookup on self.)
+        goto    = self._lrtable.lr_goto                   # Local reference to goto table (to avoid lookup on self.)
+        prod    = self._grammar.Productions               # Local reference to production list (to avoid lookup on self.)
+        defaulted_states = self._lrtable.defaulted_states # Local reference to defaulted states
+        pslice  = YaccProduction(None)                    # Production object passed to grammar rules
+        errorcount = 0                                    # Used during error recovery
+
+        # Set up the state and symbol stacks
+        self.tokens = tokens
+        self.statestack = statestack = []                 # Stack of parsing states
+        self.symstack = symstack = []                     # Stack of grammar symbols
+        pslice._stack = symstack                           # Associate the stack with the production
+        self.restart()
+
+        errtoken   = None                                 # Err token
+        while True:
+            # Get the next symbol on the input.  If a lookahead symbol
+            # is already set, we just use that. Otherwise, we'll pull
+            # the next token off of the lookaheadstack or from the lexer
+            if self.state not in defaulted_states:
+                if not lookahead:
+                    if not lookaheadstack:
+                        lookahead = next(tokens, None)  # Get the next token
+                    else:
+                        lookahead = lookaheadstack.pop()
+                    if not lookahead:
+                        lookahead = YaccSymbol()
+                        lookahead.type = '$end'
+
+                # Check the action table
+                ltype = lookahead.type
+                t = actions[self.state].get(ltype)
+            else:
+                t = defaulted_states[self.state]
+
+            if t is not None:
+                if t > 0:
+                    # shift a symbol on the stack
+                    statestack.append(t)
+                    self.state = t
+
+                    symstack.append(lookahead)
+                    lookahead = None
+
+                    # Decrease error count on successful shift
+                    if errorcount:
+                        errorcount -= 1
+                    continue
+
+                if t < 0:
+                    # reduce a symbol on the stack, emit a production
+                    self.production = p = prod[-t]
+                    pname = p.name
+                    plen  = p.len
+                    pslice._namemap = p.namemap
+
+                    # Call the production function
+                    pslice._slice = symstack[-plen:] if plen else []
+
+                    sym = YaccSymbol()
+                    sym.type = pname       
+                    value = p.func(self, pslice)
+                    if value is pslice:
+                        value = (pname, *(s.value for s in pslice._slice))
+                    sym.value = value
+                    if plen:
+                        del symstack[-plen:]
+                        del statestack[-plen:]
+
+                    symstack.append(sym)
+                    self.state = goto[statestack[-1]][pname]
+                    statestack.append(self.state)
+                    continue
+
+                if t == 0:
+                    n = symstack[-1]
+                    result = getattr(n, 'value', None)
+                    return result
+
+            if t is None:
+                # We have some kind of parsing error here.  To handle
+                # this, we are going to push the current token onto
+                # the tokenstack and replace it with an 'error' token.
+                # If there are any synchronization rules, they may
+                # catch it.
+                #
+                # In addition to pushing the error token, we call call
+                # the user defined error() function if this is the
+                # first syntax error.  This function is only called if
+                # errorcount == 0.
+                if errorcount == 0 or self.errorok:
+                    errorcount = ERROR_COUNT
+                    self.errorok = False
+                    if lookahead.type == '$end':
+                        errtoken = None               # End of file!
+                    else:
+                        errtoken = lookahead
+
+                    tok = self.error(errtoken)
+                    if tok:
+                        # User must have done some kind of panic
+                        # mode recovery on their own.  The
+                        # returned token is the next lookahead
+                        lookahead = tok
+                        self.errorok = True
+                        continue
+                    else:
+                        # If at EOF. We just return. Basically dead.
+                        if not errtoken:
+                            return
+                else:
+                    # Reset the error count.  Unsuccessful token shifted
+                    errorcount = ERROR_COUNT
+
+                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
+                # entire parse has been rolled back and we're completely hosed.   The token is
+                # discarded and we just keep going.
+
+                if len(statestack) <= 1 and lookahead.type != '$end':
+                    lookahead = None
+                    self.state = 0
+                    # Nuke the lookahead stack
+                    del lookaheadstack[:]
+                    continue
+
+                # case 2: the statestack has a couple of entries on it, but we're
+                # at the end of the file. nuke the top entry and generate an error token
+
+                # Start nuking entries on the stack
+                if lookahead.type == '$end':
+                    # Whoa. We're really hosed here. Bail out
+                    return
+
+                if lookahead.type != 'error':
+                    sym = symstack[-1]
+                    if sym.type == 'error':
+                        # Hmmm. Error is on top of stack, we'll just nuke input
+                        # symbol and continue
+                        lookahead = None
+                        continue
+
+                    # Create the error symbol for the first time and make it the new lookahead symbol
+                    t = YaccSymbol()
+                    t.type = 'error'
+
+                    if hasattr(lookahead, 'lineno'):
+                        t.lineno = lookahead.lineno
+                    if hasattr(lookahead, 'index'):
+                        t.index = lookahead.index
+                    t.value = lookahead
+                    lookaheadstack.append(lookahead)
+                    lookahead = t
+                else:
+                    sym = symstack.pop()
+                    statestack.pop()
+                    self.state = statestack[-1]
+                continue
+
+            # Call an error function here
+            raise RuntimeError('sly: internal parser error!!!\n')
author	Daniel Friesel <daniel.friesel@uos.de>	2019-12-12 17:33:13 +0100
committer	Daniel Friesel <daniel.friesel@uos.de>	2019-12-12 17:33:13 +0100
commit	ff6fc70456354b23663bee037c5e737a2b437ca8 (patch)
tree	57397e592e664bf5f20b72032be887557bc2df08
parent	575a33b819d29ee99fa7d4264a943043d1d64032 (diff)