summaryrefslogtreecommitdiff
path: root/lib/sly
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sly')
-rw-r--r--lib/sly/__init__.py3
-rw-r--r--lib/sly/ast.py9
-rw-r--r--lib/sly/docparse.py15
-rw-r--r--lib/sly/lex.py178
-rw-r--r--lib/sly/yacc.py867
5 files changed, 614 insertions, 458 deletions
diff --git a/lib/sly/__init__.py b/lib/sly/__init__.py
index 3c1e708..3a2d92e 100644
--- a/lib/sly/__init__.py
+++ b/lib/sly/__init__.py
@@ -1,6 +1,5 @@
-
from .lex import *
from .yacc import *
__version__ = "0.4"
-__all__ = [ *lex.__all__, *yacc.__all__ ]
+__all__ = [*lex.__all__, *yacc.__all__]
diff --git a/lib/sly/ast.py b/lib/sly/ast.py
index 7b79ac5..05802bd 100644
--- a/lib/sly/ast.py
+++ b/lib/sly/ast.py
@@ -1,25 +1,24 @@
# sly/ast.py
import sys
+
class AST(object):
-
@classmethod
def __init_subclass__(cls, **kwargs):
mod = sys.modules[cls.__module__]
- if not hasattr(cls, '__annotations__'):
+ if not hasattr(cls, "__annotations__"):
return
hints = list(cls.__annotations__.items())
def __init__(self, *args, **kwargs):
if len(hints) != len(args):
- raise TypeError(f'Expected {len(hints)} arguments')
+ raise TypeError(f"Expected {len(hints)} arguments")
for arg, (name, val) in zip(args, hints):
if isinstance(val, str):
val = getattr(mod, val)
if not isinstance(arg, val):
- raise TypeError(f'{name} argument must be {val}')
+ raise TypeError(f"{name} argument must be {val}")
setattr(self, name, arg)
cls.__init__ = __init__
-
diff --git a/lib/sly/docparse.py b/lib/sly/docparse.py
index d5a83ce..0f35c97 100644
--- a/lib/sly/docparse.py
+++ b/lib/sly/docparse.py
@@ -2,7 +2,8 @@
#
# Support doc-string parsing classes
-__all__ = [ 'DocParseMeta' ]
+__all__ = ["DocParseMeta"]
+
class DocParseMeta(type):
'''
@@ -44,17 +45,17 @@ class DocParseMeta(type):
@staticmethod
def __new__(meta, clsname, bases, clsdict):
- if '__doc__' in clsdict:
+ if "__doc__" in clsdict:
lexer = meta.lexer()
parser = meta.parser()
lexer.cls_name = parser.cls_name = clsname
- lexer.cls_qualname = parser.cls_qualname = clsdict['__qualname__']
- lexer.cls_module = parser.cls_module = clsdict['__module__']
- parsedict = parser.parse(lexer.tokenize(clsdict['__doc__']))
- assert isinstance(parsedict, dict), 'Parser must return a dictionary'
+ lexer.cls_qualname = parser.cls_qualname = clsdict["__qualname__"]
+ lexer.cls_module = parser.cls_module = clsdict["__module__"]
+ parsedict = parser.parse(lexer.tokenize(clsdict["__doc__"]))
+ assert isinstance(parsedict, dict), "Parser must return a dictionary"
clsdict.update(parsedict)
return super().__new__(meta, clsname, bases, clsdict)
@classmethod
def __init_subclass__(cls):
- assert hasattr(cls, 'parser') and hasattr(cls, 'lexer')
+ assert hasattr(cls, "parser") and hasattr(cls, "lexer")
diff --git a/lib/sly/lex.py b/lib/sly/lex.py
index 246dd9e..0ab0160 100644
--- a/lib/sly/lex.py
+++ b/lib/sly/lex.py
@@ -31,51 +31,63 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
-__all__ = ['Lexer', 'LexerStateChange']
+__all__ = ["Lexer", "LexerStateChange"]
import re
import copy
+
class LexError(Exception):
- '''
+ """
Exception raised if an invalid character is encountered and no default
error handler function is defined. The .text attribute of the exception
contains all remaining untokenized text. The .error_index is the index
location of the error.
- '''
+ """
+
def __init__(self, message, text, error_index):
self.args = (message,)
self.text = text
self.error_index = error_index
+
class PatternError(Exception):
- '''
+ """
Exception raised if there's some kind of problem with the specified
regex patterns in the lexer.
- '''
+ """
+
pass
+
class LexerBuildError(Exception):
- '''
+ """
Exception raised if there's some sort of problem building the lexer.
- '''
+ """
+
pass
+
class LexerStateChange(Exception):
- '''
+ """
Exception raised to force a lexing state change
- '''
+ """
+
def __init__(self, newstate, tok=None):
self.newstate = newstate
self.tok = tok
+
class Token(object):
- '''
+ """
Representation of a single token.
- '''
- __slots__ = ('type', 'value', 'lineno', 'index')
+ """
+
+ __slots__ = ("type", "value", "lineno", "index")
+
def __repr__(self):
- return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index})'
+ return f"Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index})"
+
class TokenStr(str):
@staticmethod
@@ -95,35 +107,38 @@ class TokenStr(str):
if self.remap is not None:
self.remap[self.key, key] = self.key
+
class _Before:
def __init__(self, tok, pattern):
self.tok = tok
self.pattern = pattern
+
class LexerMetaDict(dict):
- '''
+ """
Special dictionary that prohibits duplicate definitions in lexer specifications.
- '''
+ """
+
def __init__(self):
- self.before = { }
- self.delete = [ ]
- self.remap = { }
+ self.before = {}
+ self.delete = []
+ self.remap = {}
def __setitem__(self, key, value):
if isinstance(value, str):
value = TokenStr(value, key, self.remap)
-
+
if isinstance(value, _Before):
self.before[key] = value.tok
value = TokenStr(value.pattern, key, self.remap)
-
+
if key in self and not isinstance(value, property):
prior = self[key]
if isinstance(prior, str):
if callable(value):
value.pattern = prior
else:
- raise AttributeError(f'Name {key} redefined')
+ raise AttributeError(f"Name {key} redefined")
super().__setitem__(key, value)
@@ -135,41 +150,47 @@ class LexerMetaDict(dict):
return super().__delitem__(key)
def __getitem__(self, key):
- if key not in self and key.split('ignore_')[-1].isupper() and key[:1] != '_':
+ if key not in self and key.split("ignore_")[-1].isupper() and key[:1] != "_":
return TokenStr(key, key, self.remap)
else:
return super().__getitem__(key)
+
class LexerMeta(type):
- '''
+ """
Metaclass for collecting lexing rules
- '''
+ """
+
@classmethod
def __prepare__(meta, name, bases):
d = LexerMetaDict()
def _(pattern, *extra):
patterns = [pattern, *extra]
+
def decorate(func):
- pattern = '|'.join(f'({pat})' for pat in patterns )
- if hasattr(func, 'pattern'):
- func.pattern = pattern + '|' + func.pattern
+ pattern = "|".join(f"({pat})" for pat in patterns)
+ if hasattr(func, "pattern"):
+ func.pattern = pattern + "|" + func.pattern
else:
func.pattern = pattern
return func
+
return decorate
- d['_'] = _
- d['before'] = _Before
+ d["_"] = _
+ d["before"] = _Before
return d
def __new__(meta, clsname, bases, attributes):
- del attributes['_']
- del attributes['before']
+ del attributes["_"]
+ del attributes["before"]
# Create attributes for use in the actual class body
- cls_attributes = { str(key): str(val) if isinstance(val, TokenStr) else val
- for key, val in attributes.items() }
+ cls_attributes = {
+ str(key): str(val) if isinstance(val, TokenStr) else val
+ for key, val in attributes.items()
+ }
cls = super().__new__(meta, clsname, bases, cls_attributes)
# Attach various metadata to the class
@@ -180,11 +201,12 @@ class LexerMeta(type):
cls._build()
return cls
+
class Lexer(metaclass=LexerMeta):
# These attributes may be defined in subclasses
tokens = set()
literals = set()
- ignore = ''
+ ignore = ""
reflags = 0
regex_module = re
@@ -214,7 +236,7 @@ class Lexer(metaclass=LexerMeta):
# Such functions can be created with the @_ decorator or by defining
# function with the same name as a previously defined string.
#
- # This function is responsible for keeping rules in order.
+ # This function is responsible for keeping rules in order.
# Collect all previous rules from base classes
rules = []
@@ -222,15 +244,21 @@ class Lexer(metaclass=LexerMeta):
for base in cls.__bases__:
if isinstance(base, LexerMeta):
rules.extend(base._rules)
-
+
# Dictionary of previous rules
existing = dict(rules)
for key, value in cls._attributes.items():
- if (key in cls._token_names) or key.startswith('ignore_') or hasattr(value, 'pattern'):
- if callable(value) and not hasattr(value, 'pattern'):
- raise LexerBuildError(f"function {value} doesn't have a regex pattern")
-
+ if (
+ (key in cls._token_names)
+ or key.startswith("ignore_")
+ or hasattr(value, "pattern")
+ ):
+ if callable(value) and not hasattr(value, "pattern"):
+ raise LexerBuildError(
+ f"function {value} doesn't have a regex pattern"
+ )
+
if key in existing:
# The definition matches something that already existed in the base class.
# We replace it, but keep the original ordering
@@ -252,21 +280,27 @@ class Lexer(metaclass=LexerMeta):
rules.append((key, value))
existing[key] = value
- elif isinstance(value, str) and not key.startswith('_') and key not in {'ignore', 'literals'}:
- raise LexerBuildError(f'{key} does not match a name in tokens')
+ elif (
+ isinstance(value, str)
+ and not key.startswith("_")
+ and key not in {"ignore", "literals"}
+ ):
+ raise LexerBuildError(f"{key} does not match a name in tokens")
# Apply deletion rules
- rules = [ (key, value) for key, value in rules if key not in cls._delete ]
+ rules = [(key, value) for key, value in rules if key not in cls._delete]
cls._rules = rules
@classmethod
def _build(cls):
- '''
+ """
Build the lexer object from the collected tokens and regular expressions.
Validate the rules to make sure they look sane.
- '''
- if 'tokens' not in vars(cls):
- raise LexerBuildError(f'{cls.__qualname__} class does not define a tokens attribute')
+ """
+ if "tokens" not in vars(cls):
+ raise LexerBuildError(
+ f"{cls.__qualname__} class does not define a tokens attribute"
+ )
# Pull definitions created for any parent classes
cls._token_names = cls._token_names | set(cls.tokens)
@@ -282,17 +316,17 @@ class Lexer(metaclass=LexerMeta):
remapped_toks = set()
for d in cls._remapping.values():
remapped_toks.update(d.values())
-
+
undefined = remapped_toks - set(cls._token_names)
if undefined:
- missing = ', '.join(undefined)
- raise LexerBuildError(f'{missing} not included in token(s)')
+ missing = ", ".join(undefined)
+ raise LexerBuildError(f"{missing} not included in token(s)")
cls._collect_rules()
parts = []
for tokname, value in cls._rules:
- if tokname.startswith('ignore_'):
+ if tokname.startswith("ignore_"):
tokname = tokname[7:]
cls._ignored_tokens.add(tokname)
@@ -301,20 +335,20 @@ class Lexer(metaclass=LexerMeta):
elif callable(value):
cls._token_funcs[tokname] = value
- pattern = getattr(value, 'pattern')
+ pattern = getattr(value, "pattern")
# Form the regular expression component
- part = f'(?P<{tokname}>{pattern})'
+ part = f"(?P<{tokname}>{pattern})"
# Make sure the individual regex compiles properly
try:
cpat = cls.regex_module.compile(part, cls.reflags)
except Exception as e:
- raise PatternError(f'Invalid regex for token {tokname}') from e
+ raise PatternError(f"Invalid regex for token {tokname}") from e
# Verify that the pattern doesn't match the empty string
- if cpat.match(''):
- raise PatternError(f'Regex for token {tokname} matches empty input')
+ if cpat.match(""):
+ raise PatternError(f"Regex for token {tokname} matches empty input")
parts.append(part)
@@ -322,43 +356,45 @@ class Lexer(metaclass=LexerMeta):
return
# Form the master regular expression
- #previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
+ # previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
# cls._master_re = cls.regex_module.compile('|'.join(parts) + previous, cls.reflags)
- cls._master_re = cls.regex_module.compile('|'.join(parts), cls.reflags)
+ cls._master_re = cls.regex_module.compile("|".join(parts), cls.reflags)
# Verify that that ignore and literals specifiers match the input type
if not isinstance(cls.ignore, str):
- raise LexerBuildError('ignore specifier must be a string')
+ raise LexerBuildError("ignore specifier must be a string")
if not all(isinstance(lit, str) for lit in cls.literals):
- raise LexerBuildError('literals must be specified as strings')
+ raise LexerBuildError("literals must be specified as strings")
def begin(self, cls):
- '''
+ """
Begin a new lexer state
- '''
+ """
assert isinstance(cls, LexerMeta), "state must be a subclass of Lexer"
if self.__set_state:
self.__set_state(cls)
self.__class__ = cls
def push_state(self, cls):
- '''
+ """
Push a new lexer state onto the stack
- '''
+ """
if self.__state_stack is None:
self.__state_stack = []
self.__state_stack.append(type(self))
self.begin(cls)
def pop_state(self):
- '''
+ """
Pop a lexer state from the stack
- '''
+ """
self.begin(self.__state_stack.pop())
def tokenize(self, text, lineno=1, index=0):
- _ignored_tokens = _master_re = _ignore = _token_funcs = _literals = _remapping = None
+ _ignored_tokens = (
+ _master_re
+ ) = _ignore = _token_funcs = _literals = _remapping = None
def _set_state(cls):
nonlocal _ignored_tokens, _master_re, _ignore, _token_funcs, _literals, _remapping
@@ -419,7 +455,7 @@ class Lexer(metaclass=LexerMeta):
# A lexing error
self.index = index
self.lineno = lineno
- tok.type = 'ERROR'
+ tok.type = "ERROR"
tok.value = text[index:]
tok = self.error(tok)
if tok is not None:
@@ -436,4 +472,8 @@ class Lexer(metaclass=LexerMeta):
# Default implementations of the error handler. May be changed in subclasses
def error(self, t):
- raise LexError(f'Illegal character {t.value[0]!r} at index {self.index}', t.value, self.index)
+ raise LexError(
+ f"Illegal character {t.value[0]!r} at index {self.index}",
+ t.value,
+ self.index,
+ )
diff --git a/lib/sly/yacc.py b/lib/sly/yacc.py
index c30f13c..167168d 100644
--- a/lib/sly/yacc.py
+++ b/lib/sly/yacc.py
@@ -35,22 +35,25 @@ import sys
import inspect
from collections import OrderedDict, defaultdict
-__all__ = [ 'Parser' ]
+__all__ = ["Parser"]
+
class YaccError(Exception):
- '''
+ """
Exception raised for yacc-related build errors.
- '''
+ """
+
pass
-#-----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
# === User configurable parameters ===
#
-# Change these to modify the default behavior of yacc (if you wish).
+# Change these to modify the default behavior of yacc (if you wish).
# Move these parameters to the Yacc class itself.
-#-----------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
-ERROR_COUNT = 3 # Number of symbols that must be shifted to leave recovery mode
+ERROR_COUNT = 3 # Number of symbols that must be shifted to leave recovery mode
MAXINT = sys.maxsize
# This object is a stand-in for a logging object created by the
@@ -59,20 +62,21 @@ MAXINT = sys.maxsize
# information, they can create their own logging object and pass
# it into SLY.
+
class SlyLogger(object):
def __init__(self, f):
self.f = f
def debug(self, msg, *args, **kwargs):
- self.f.write((msg % args) + '\n')
+ self.f.write((msg % args) + "\n")
info = debug
def warning(self, msg, *args, **kwargs):
- self.f.write('WARNING: ' + (msg % args) + '\n')
+ self.f.write("WARNING: " + (msg % args) + "\n")
def error(self, msg, *args, **kwargs):
- self.f.write('ERROR: ' + (msg % args) + '\n')
+ self.f.write("ERROR: " + (msg % args) + "\n")
critical = debug
@@ -86,6 +90,7 @@ class SlyLogger(object):
# .index = Starting lex position
# ----------------------------------------------------------------------
+
class YaccSymbol:
def __str__(self):
return self.type
@@ -93,19 +98,22 @@ class YaccSymbol:
def __repr__(self):
return str(self)
+
# ----------------------------------------------------------------------
# This class is a wrapper around the objects actually passed to each
# grammar rule. Index lookup and assignment actually assign the
# .value attribute of the underlying YaccSymbol object.
# The lineno() method returns the line number of a given
-# item (or 0 if not defined).
+# item (or 0 if not defined).
# ----------------------------------------------------------------------
+
class YaccProduction:
- __slots__ = ('_slice', '_namemap', '_stack')
+ __slots__ = ("_slice", "_namemap", "_stack")
+
def __init__(self, s, stack=None):
self._slice = s
- self._namemap = { }
+ self._namemap = {}
self._stack = stack
def __getitem__(self, n):
@@ -128,34 +136,35 @@ class YaccProduction:
for tok in self._slice:
if isinstance(tok, YaccSymbol):
continue
- lineno = getattr(tok, 'lineno', None)
+ lineno = getattr(tok, "lineno", None)
if lineno:
return lineno
- raise AttributeError('No line number found')
+ raise AttributeError("No line number found")
@property
def index(self):
for tok in self._slice:
if isinstance(tok, YaccSymbol):
continue
- index = getattr(tok, 'index', None)
+ index = getattr(tok, "index", None)
if index is not None:
return index
- raise AttributeError('No index attribute found')
+ raise AttributeError("No index attribute found")
def __getattr__(self, name):
if name in self._namemap:
return self._slice[self._namemap[name]].value
else:
- nameset = '{' + ', '.join(self._namemap) + '}'
- raise AttributeError(f'No symbol {name}. Must be one of {nameset}.')
+ nameset = "{" + ", ".join(self._namemap) + "}"
+ raise AttributeError(f"No symbol {name}. Must be one of {nameset}.")
def __setattr__(self, name, value):
- if name[:1] == '_':
+ if name[:1] == "_":
super().__setattr__(name, value)
else:
raise AttributeError(f"Can't reassign the value of attribute {name!r}")
+
# -----------------------------------------------------------------------------
# === Grammar Representation ===
#
@@ -187,19 +196,23 @@ class YaccProduction:
# usyms - Set of unique symbols found in the production
# -----------------------------------------------------------------------------
+
class Production(object):
reduced = 0
- def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
- self.name = name
- self.prod = tuple(prod)
- self.number = number
- self.func = func
- self.file = file
- self.line = line
- self.prec = precedence
-
+
+ def __init__(
+ self, number, name, prod, precedence=("right", 0), func=None, file="", line=0
+ ):
+ self.name = name
+ self.prod = tuple(prod)
+ self.number = number
+ self.func = func
+ self.file = file
+ self.line = line
+ self.prec = precedence
+
# Internal settings used during table construction
- self.len = len(self.prod) # Length of the production
+ self.len = len(self.prod) # Length of the production
# Create a list of unique production symbols used in the production
self.usyms = []
@@ -216,33 +229,33 @@ class Production(object):
m[key] = indices[0]
else:
for n, index in enumerate(indices):
- m[key+str(n)] = index
+ m[key + str(n)] = index
self.namemap = m
-
+
# List of all LR items for the production
self.lr_items = []
self.lr_next = None
def __str__(self):
if self.prod:
- s = '%s -> %s' % (self.name, ' '.join(self.prod))
+ s = "%s -> %s" % (self.name, " ".join(self.prod))
else:
- s = f'{self.name} -> <empty>'
+ s = f"{self.name} -> <empty>"
if self.prec[1]:
- s += ' [precedence=%s, level=%d]' % self.prec
+ s += " [precedence=%s, level=%d]" % self.prec
return s
def __repr__(self):
- return f'Production({self})'
+ return f"Production({self})"
def __len__(self):
return len(self.prod)
def __nonzero__(self):
- raise RuntimeError('Used')
+ raise RuntimeError("Used")
return 1
def __getitem__(self, index):
@@ -255,15 +268,16 @@ class Production(object):
p = LRItem(self, n)
# Precompute the list of productions immediately following.
try:
- p.lr_after = Prodnames[p.prod[n+1]]
+ p.lr_after = Prodnames[p.prod[n + 1]]
except (IndexError, KeyError):
p.lr_after = []
try:
- p.lr_before = p.prod[n-1]
+ p.lr_before = p.prod[n - 1]
except IndexError:
p.lr_before = None
return p
+
# -----------------------------------------------------------------------------
# class LRItem
#
@@ -288,27 +302,29 @@ class Production(object):
# lr_before - Grammar symbol immediately before
# -----------------------------------------------------------------------------
+
class LRItem(object):
def __init__(self, p, n):
- self.name = p.name
- self.prod = list(p.prod)
- self.number = p.number
- self.lr_index = n
+ self.name = p.name
+ self.prod = list(p.prod)
+ self.number = p.number
+ self.lr_index = n
self.lookaheads = {}
- self.prod.insert(n, '.')
- self.prod = tuple(self.prod)
- self.len = len(self.prod)
- self.usyms = p.usyms
+ self.prod.insert(n, ".")
+ self.prod = tuple(self.prod)
+ self.len = len(self.prod)
+ self.usyms = p.usyms
def __str__(self):
if self.prod:
- s = '%s -> %s' % (self.name, ' '.join(self.prod))
+ s = "%s -> %s" % (self.name, " ".join(self.prod))
else:
- s = f'{self.name} -> <empty>'
+ s = f"{self.name} -> <empty>"
return s
def __repr__(self):
- return f'LRItem({self})'
+ return f"LRItem({self})"
+
# -----------------------------------------------------------------------------
# rightmost_terminal()
@@ -323,6 +339,7 @@ def rightmost_terminal(symbols, terminals):
i -= 1
return None
+
# -----------------------------------------------------------------------------
# === GRAMMAR CLASS ===
#
@@ -331,45 +348,52 @@ def rightmost_terminal(symbols, terminals):
# This data is used for critical parts of the table generation process later.
# -----------------------------------------------------------------------------
+
class GrammarError(YaccError):
pass
+
class Grammar(object):
def __init__(self, terminals):
- self.Productions = [None] # A list of all of the productions. The first
- # entry is always reserved for the purpose of
- # building an augmented grammar
+ self.Productions = [None] # A list of all of the productions. The first
+ # entry is always reserved for the purpose of
+ # building an augmented grammar
- self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all
- # productions of that nonterminal.
+ self.Prodnames = (
+ {}
+ ) # A dictionary mapping the names of nonterminals to a list of all
+ # productions of that nonterminal.
- self.Prodmap = {} # A dictionary that is only used to detect duplicate
- # productions.
+ self.Prodmap = {} # A dictionary that is only used to detect duplicate
+ # productions.
- self.Terminals = {} # A dictionary mapping the names of terminal symbols to a
- # list of the rules where they are used.
+ self.Terminals = {} # A dictionary mapping the names of terminal symbols to a
+ # list of the rules where they are used.
for term in terminals:
self.Terminals[term] = []
- self.Terminals['error'] = []
-
- self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list
- # of rule numbers where they are used.
+ self.Terminals["error"] = []
- self.First = {} # A dictionary of precomputed FIRST(x) symbols
+ self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list
+ # of rule numbers where they are used.
- self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols
+ self.First = {} # A dictionary of precomputed FIRST(x) symbols
- self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the
- # form ('right',level) or ('nonassoc', level) or ('left',level)
+ self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols
- self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
- # This is only used to provide error checking and to generate
- # a warning about unused precedence rules.
+ self.Precedence = (
+ {}
+ ) # Precedence rules for each terminal. Contains tuples of the
+ # form ('right',level) or ('nonassoc', level) or ('left',level)
- self.Start = None # Starting symbol for the grammar
+ self.UsedPrecedence = (
+ set()
+ ) # Precedence rules that were actually used by the grammer.
+ # This is only used to provide error checking and to generate
+ # a warning about unused precedence rules.
+ self.Start = None # Starting symbol for the grammar
def __len__(self):
return len(self.Productions)
@@ -386,11 +410,15 @@ class Grammar(object):
# -----------------------------------------------------------------------------
def set_precedence(self, term, assoc, level):
- assert self.Productions == [None], 'Must call set_precedence() before add_production()'
+ assert self.Productions == [
+ None
+ ], "Must call set_precedence() before add_production()"
if term in self.Precedence:
- raise GrammarError(f'Precedence already specified for terminal {term!r}')
- if assoc not in ['left', 'right', 'nonassoc']:
- raise GrammarError(f"Associativity of {term!r} must be one of 'left','right', or 'nonassoc'")
+ raise GrammarError(f"Precedence already specified for terminal {term!r}")
+ if assoc not in ["left", "right", "nonassoc"]:
+ raise GrammarError(
+ f"Associativity of {term!r} must be one of 'left','right', or 'nonassoc'"
+ )
self.Precedence[term] = (assoc, level)
# -----------------------------------------------------------------------------
@@ -410,51 +438,65 @@ class Grammar(object):
# are valid and that %prec is used correctly.
# -----------------------------------------------------------------------------
- def add_production(self, prodname, syms, func=None, file='', line=0):
+ def add_production(self, prodname, syms, func=None, file="", line=0):
if prodname in self.Terminals:
- raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}. Already defined as a token')
- if prodname == 'error':
- raise GrammarError(f'{file}:{line}: Illegal rule name {prodname!r}. error is a reserved word')
+ raise GrammarError(
+ f"{file}:{line}: Illegal rule name {prodname!r}. Already defined as a token"
+ )
+ if prodname == "error":
+ raise GrammarError(
+ f"{file}:{line}: Illegal rule name {prodname!r}. error is a reserved word"
+ )
# Look for literal tokens
for n, s in enumerate(syms):
if s[0] in "'\"" and s[0] == s[-1]:
c = s[1:-1]
- if (len(c) != 1):
- raise GrammarError(f'{file}:{line}: Literal token {s} in rule {prodname!r} may only be a single character')
+ if len(c) != 1:
+ raise GrammarError(
+ f"{file}:{line}: Literal token {s} in rule {prodname!r} may only be a single character"
+ )
if c not in self.Terminals:
self.Terminals[c] = []
syms[n] = c
continue
# Determine the precedence level
- if '%prec' in syms:
- if syms[-1] == '%prec':
- raise GrammarError(f'{file}:{line}: Syntax error. Nothing follows %%prec')
- if syms[-2] != '%prec':
- raise GrammarError(f'{file}:{line}: Syntax error. %prec can only appear at the end of a grammar rule')
+ if "%prec" in syms:
+ if syms[-1] == "%prec":
+ raise GrammarError(
+ f"{file}:{line}: Syntax error. Nothing follows %%prec"
+ )
+ if syms[-2] != "%prec":
+ raise GrammarError(
+ f"{file}:{line}: Syntax error. %prec can only appear at the end of a grammar rule"
+ )
precname = syms[-1]
prodprec = self.Precedence.get(precname)
if not prodprec:
- raise GrammarError(f'{file}:{line}: Nothing known about the precedence of {precname!r}')
+ raise GrammarError(
+ f"{file}:{line}: Nothing known about the precedence of {precname!r}"
+ )
else:
self.UsedPrecedence.add(precname)
- del syms[-2:] # Drop %prec from the rule
+ del syms[-2:] # Drop %prec from the rule
else:
# If no %prec, precedence is determined by the rightmost terminal symbol
precname = rightmost_terminal(syms, self.Terminals)
- prodprec = self.Precedence.get(precname, ('right', 0))
+ prodprec = self.Precedence.get(precname, ("right", 0))
# See if the rule is already in the rulemap
- map = '%s -> %s' % (prodname, syms)
+ map = "%s -> %s" % (prodname, syms)
if map in self.Prodmap:
m = self.Prodmap[map]
- raise GrammarError(f'{file}:{line}: Duplicate rule {m}. ' +
- f'Previous definition at {m.file}:{m.line}')
+ raise GrammarError(
+ f"{file}:{line}: Duplicate rule {m}. "
+ + f"Previous definition at {m.file}:{m.line}"
+ )
# From this point on, everything is valid. Create a new Production instance
- pnumber = len(self.Productions)
+ pnumber = len(self.Productions)
if prodname not in self.Nonterminals:
self.Nonterminals[prodname] = []
@@ -493,7 +535,7 @@ class Grammar(object):
start = self.Productions[1].name
if start not in self.Nonterminals:
- raise GrammarError(f'start symbol {start} undefined')
+ raise GrammarError(f"start symbol {start} undefined")
self.Productions[0] = Production(0, "S'", [start])
self.Nonterminals[start].append(0)
self.Start = start
@@ -535,7 +577,7 @@ class Grammar(object):
for t in self.Terminals:
terminates[t] = True
- terminates['$end'] = True
+ terminates["$end"] = True
# Nonterminals:
@@ -576,7 +618,7 @@ class Grammar(object):
infinite = []
for (s, term) in terminates.items():
if not term:
- if s not in self.Prodnames and s not in self.Terminals and s != 'error':
+ if s not in self.Prodnames and s not in self.Terminals and s != "error":
# s is used-but-not-defined, and we've already warned of that,
# so it would be overkill to say that it's also non-terminating.
pass
@@ -599,7 +641,7 @@ class Grammar(object):
continue
for s in p.prod:
- if s not in self.Prodnames and s not in self.Terminals and s != 'error':
+ if s not in self.Prodnames and s not in self.Terminals and s != "error":
result.append((s, p))
return result
@@ -612,7 +654,7 @@ class Grammar(object):
def unused_terminals(self):
unused_tok = []
for s, v in self.Terminals.items():
- if s != 'error' and not v:
+ if s != "error" and not v:
unused_tok.append(s)
return unused_tok
@@ -666,7 +708,7 @@ class Grammar(object):
# Add all the non-<empty> symbols of First[x] to the result.
for f in self.First[x]:
- if f == '<empty>':
+ if f == "<empty>":
x_produces_empty = True
else:
if f not in result:
@@ -683,7 +725,7 @@ class Grammar(object):
# There was no 'break' from the loop,
# so x_produces_empty was true for all x in beta,
# so beta produces empty as well.
- result.append('<empty>')
+ result.append("<empty>")
return result
@@ -700,7 +742,7 @@ class Grammar(object):
for t in self.Terminals:
self.First[t] = [t]
- self.First['$end'] = ['$end']
+ self.First["$end"] = ["$end"]
# Nonterminals:
@@ -745,7 +787,7 @@ class Grammar(object):
if not start:
start = self.Productions[1].name
- self.Follow[start] = ['$end']
+ self.Follow[start] = ["$end"]
while True:
didadd = False
@@ -754,15 +796,15 @@ class Grammar(object):
for i, B in enumerate(p.prod):
if B in self.Nonterminals:
# Okay. We got a non-terminal in a production
- fst = self._first(p.prod[i+1:])
+ fst = self._first(p.prod[i + 1 :])
hasempty = False
for f in fst:
- if f != '<empty>' and f not in self.Follow[B]:
+ if f != "<empty>" and f not in self.Follow[B]:
self.Follow[B].append(f)
didadd = True
- if f == '<empty>':
+ if f == "<empty>":
hasempty = True
- if hasempty or i == (len(p.prod)-1):
+ if hasempty or i == (len(p.prod) - 1):
# Add elements of follow(a) to follow(b)
for f in self.Follow[p.name]:
if f not in self.Follow[B]:
@@ -772,7 +814,6 @@ class Grammar(object):
break
return self.Follow
-
# -----------------------------------------------------------------------------
# build_lritems()
#
@@ -800,11 +841,11 @@ class Grammar(object):
lri = LRItem(p, i)
# Precompute the list of productions immediately following
try:
- lri.lr_after = self.Prodnames[lri.prod[i+1]]
+ lri.lr_after = self.Prodnames[lri.prod[i + 1]]
except (IndexError, KeyError):
lri.lr_after = []
try:
- lri.lr_before = lri.prod[i-1]
+ lri.lr_before = lri.prod[i - 1]
except IndexError:
lri.lr_before = None
@@ -816,33 +857,38 @@ class Grammar(object):
i += 1
p.lr_items = lr_items
-
# ----------------------------------------------------------------------
# Debugging output. Printing the grammar will produce a detailed
# description along with some diagnostics.
# ----------------------------------------------------------------------
def __str__(self):
out = []
- out.append('Grammar:\n')
+ out.append("Grammar:\n")
for n, p in enumerate(self.Productions):
- out.append(f'Rule {n:<5d} {p}')
-
+ out.append(f"Rule {n:<5d} {p}")
+
unused_terminals = self.unused_terminals()
if unused_terminals:
- out.append('\nUnused terminals:\n')
+ out.append("\nUnused terminals:\n")
for term in unused_terminals:
- out.append(f' {term}')
+ out.append(f" {term}")
- out.append('\nTerminals, with rules where they appear:\n')
+ out.append("\nTerminals, with rules where they appear:\n")
for term in sorted(self.Terminals):
- out.append('%-20s : %s' % (term, ' '.join(str(s) for s in self.Terminals[term])))
+ out.append(
+ "%-20s : %s" % (term, " ".join(str(s) for s in self.Terminals[term]))
+ )
- out.append('\nNonterminals, with rules where they appear:\n')
+ out.append("\nNonterminals, with rules where they appear:\n")
for nonterm in sorted(self.Nonterminals):
- out.append('%-20s : %s' % (nonterm, ' '.join(str(s) for s in self.Nonterminals[nonterm])))
+ out.append(
+ "%-20s : %s"
+ % (nonterm, " ".join(str(s) for s in self.Nonterminals[nonterm]))
+ )
+
+ out.append("")
+ return "\n".join(out)
- out.append('')
- return '\n'.join(out)
# -----------------------------------------------------------------------------
# === LR Generator ===
@@ -868,6 +914,7 @@ class Grammar(object):
# FP - Set-valued function
# ------------------------------------------------------------------------------
+
def digraph(X, R, FP):
N = {}
for x in X:
@@ -879,13 +926,14 @@ def digraph(X, R, FP):
traverse(x, N, stack, F, X, R, FP)
return F
+
def traverse(x, N, stack, F, X, R, FP):
stack.append(x)
d = len(stack)
N[x] = d
- F[x] = FP(x) # F(X) <- F'(x)
+ F[x] = FP(x) # F(X) <- F'(x)
- rel = R(x) # Get y's related to x
+ rel = R(x) # Get y's related to x
for y in rel:
if N[y] == 0:
traverse(y, N, stack, F, X, R, FP)
@@ -902,9 +950,11 @@ def traverse(x, N, stack, F, X, R, FP):
F[stack[-1]] = F[x]
element = stack.pop()
+
class LALRError(YaccError):
pass
+
# -----------------------------------------------------------------------------
# == LRGeneratedTable ==
#
@@ -912,26 +962,27 @@ class LALRError(YaccError):
# public methods except for write()
# -----------------------------------------------------------------------------
+
class LRTable(object):
def __init__(self, grammar):
self.grammar = grammar
# Internal attributes
- self.lr_action = {} # Action table
- self.lr_goto = {} # Goto table
- self.lr_productions = grammar.Productions # Copy of grammar Production array
- self.lr_goto_cache = {} # Cache of computed gotos
- self.lr0_cidhash = {} # Cache of closures
- self._add_count = 0 # Internal counter used to detect cycles
+ self.lr_action = {} # Action table
+ self.lr_goto = {} # Goto table
+ self.lr_productions = grammar.Productions # Copy of grammar Production array
+ self.lr_goto_cache = {} # Cache of computed gotos
+ self.lr0_cidhash = {} # Cache of closures
+ self._add_count = 0 # Internal counter used to detect cycles
# Diagonistic information filled in by the table generator
self.state_descriptions = OrderedDict()
- self.sr_conflict = 0
- self.rr_conflict = 0
- self.conflicts = [] # List of conflicts
+ self.sr_conflict = 0
+ self.rr_conflict = 0
+ self.conflicts = [] # List of conflicts
- self.sr_conflicts = []
- self.rr_conflicts = []
+ self.sr_conflicts = []
+ self.rr_conflicts = []
# Build the tables
self.grammar.build_lritems()
@@ -964,7 +1015,7 @@ class LRTable(object):
didadd = False
for j in J:
for x in j.lr_after:
- if getattr(x, 'lr0_added', 0) == self._add_count:
+ if getattr(x, "lr0_added", 0) == self._add_count:
continue
# Add B --> .G to J
J.append(x.lr_next)
@@ -1004,13 +1055,13 @@ class LRTable(object):
s[id(n)] = s1
gs.append(n)
s = s1
- g = s.get('$end')
+ g = s.get("$end")
if not g:
if gs:
g = self.lr0_closure(gs)
- s['$end'] = g
+ s["$end"] = g
else:
- s['$end'] = gs
+ s["$end"] = gs
self.lr_goto_cache[(id(I), x)] = g
return g
@@ -1105,7 +1156,7 @@ class LRTable(object):
for stateno, state in enumerate(C):
for p in state:
if p.lr_index < p.len - 1:
- t = (stateno, p.prod[p.lr_index+1])
+ t = (stateno, p.prod[p.lr_index + 1])
if t[1] in self.grammar.Nonterminals:
if t not in trans:
trans.append(t)
@@ -1128,14 +1179,14 @@ class LRTable(object):
g = self.lr0_goto(C[state], N)
for p in g:
if p.lr_index < p.len - 1:
- a = p.prod[p.lr_index+1]
+ a = p.prod[p.lr_index + 1]
if a in self.grammar.Terminals:
if a not in terms:
terms.append(a)
# This extra bit is to handle the start state
if state == 0 and N == self.grammar.Productions[0].prod[0]:
- terms.append('$end')
+ terms.append("$end")
return terms
@@ -1189,8 +1240,8 @@ class LRTable(object):
# -----------------------------------------------------------------------------
def compute_lookback_includes(self, C, trans, nullable):
- lookdict = {} # Dictionary of lookback relations
- includedict = {} # Dictionary of include relations
+ lookdict = {} # Dictionary of lookback relations
+ includedict = {} # Dictionary of include relations
# Make a dictionary of non-terminal transitions
dtrans = {}
@@ -1223,7 +1274,7 @@ class LRTable(object):
li = lr_index + 1
while li < p.len:
if p.prod[li] in self.grammar.Terminals:
- break # No forget it
+ break # No forget it
if p.prod[li] not in nullable:
break
li = li + 1
@@ -1231,8 +1282,8 @@ class LRTable(object):
# Appears to be a relation between (j,t) and (state,N)
includes.append((j, t))
- g = self.lr0_goto(C[j], t) # Go to next set
- j = self.lr0_cidhash.get(id(g), -1) # Go to next state
+ g = self.lr0_goto(C[j], t) # Go to next set
+ j = self.lr0_cidhash.get(id(g), -1) # Go to next state
# When we get here, j is the final state, now we have to locate the production
for r in C[j]:
@@ -1243,7 +1294,7 @@ class LRTable(object):
i = 0
# This look is comparing a production ". A B C" with "A B C ."
while i < r.lr_index:
- if r.prod[i] != p.prod[i+1]:
+ if r.prod[i] != p.prod[i + 1]:
break
i = i + 1
else:
@@ -1270,7 +1321,7 @@ class LRTable(object):
def compute_read_sets(self, C, ntrans, nullable):
FP = lambda x: self.dr_relation(C, x, nullable)
- R = lambda x: self.reads_relation(C, x, nullable)
+ R = lambda x: self.reads_relation(C, x, nullable)
F = digraph(ntrans, R, FP)
return F
@@ -1292,7 +1343,7 @@ class LRTable(object):
def compute_follow_sets(self, ntrans, readsets, inclsets):
FP = lambda x: readsets[x]
- R = lambda x: inclsets.get(x, [])
+ R = lambda x: inclsets.get(x, [])
F = digraph(ntrans, R, FP)
return F
@@ -1352,11 +1403,11 @@ class LRTable(object):
# -----------------------------------------------------------------------------
def lr_parse_table(self):
Productions = self.grammar.Productions
- Precedence = self.grammar.Precedence
- goto = self.lr_goto # Goto array
- action = self.lr_action # Action array
+ Precedence = self.grammar.Precedence
+ goto = self.lr_goto # Goto array
+ action = self.lr_action # Action array
- actionp = {} # Action production array (temporary)
+ actionp = {} # Action production array (temporary)
# Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
# This determines the number of states
@@ -1368,129 +1419,149 @@ class LRTable(object):
for st, I in enumerate(C):
descrip = []
# Loop over each production in I
- actlist = [] # List of actions
- st_action = {}
+ actlist = [] # List of actions
+ st_action = {}
st_actionp = {}
- st_goto = {}
+ st_goto = {}
- descrip.append(f'\nstate {st}\n')
+ descrip.append(f"\nstate {st}\n")
for p in I:
- descrip.append(f' ({p.number}) {p}')
+ descrip.append(f" ({p.number}) {p}")
for p in I:
- if p.len == p.lr_index + 1:
- if p.name == "S'":
- # Start symbol. Accept!
- st_action['$end'] = 0
- st_actionp['$end'] = p
- else:
- # We are at the end of a production. Reduce!
- laheads = p.lookaheads[st]
- for a in laheads:
- actlist.append((a, p, f'reduce using rule {p.number} ({p})'))
- r = st_action.get(a)
- if r is not None:
- # Have a shift/reduce or reduce/reduce conflict
- if r > 0:
- # Need to decide on shift or reduce here
- # By default we favor shifting. Need to add
- # some precedence rules here.
-
- # Shift precedence comes from the token
- sprec, slevel = Precedence.get(a, ('right', 0))
-
- # Reduce precedence comes from rule being reduced (p)
- rprec, rlevel = Productions[p.number].prec
-
- if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
- # We really need to reduce here.
- st_action[a] = -p.number
- st_actionp[a] = p
- if not slevel and not rlevel:
- descrip.append(f' ! shift/reduce conflict for {a} resolved as reduce')
- self.sr_conflicts.append((st, a, 'reduce'))
- Productions[p.number].reduced += 1
- elif (slevel == rlevel) and (rprec == 'nonassoc'):
- st_action[a] = None
- else:
- # Hmmm. Guess we'll keep the shift
- if not rlevel:
- descrip.append(f' ! shift/reduce conflict for {a} resolved as shift')
- self.sr_conflicts.append((st, a, 'shift'))
- elif r <= 0:
- # Reduce/reduce conflict. In this case, we favor the rule
- # that was defined first in the grammar file
- oldp = Productions[-r]
- pp = Productions[p.number]
- if oldp.line > pp.line:
- st_action[a] = -p.number
- st_actionp[a] = p
- chosenp, rejectp = pp, oldp
- Productions[p.number].reduced += 1
- Productions[oldp.number].reduced -= 1
- else:
- chosenp, rejectp = oldp, pp
- self.rr_conflicts.append((st, chosenp, rejectp))
- descrip.append(' ! reduce/reduce conflict for %s resolved using rule %d (%s)' %
- (a, st_actionp[a].number, st_actionp[a]))
+ if p.len == p.lr_index + 1:
+ if p.name == "S'":
+ # Start symbol. Accept!
+ st_action["$end"] = 0
+ st_actionp["$end"] = p
+ else:
+ # We are at the end of a production. Reduce!
+ laheads = p.lookaheads[st]
+ for a in laheads:
+ actlist.append(
+ (a, p, f"reduce using rule {p.number} ({p})")
+ )
+ r = st_action.get(a)
+ if r is not None:
+ # Have a shift/reduce or reduce/reduce conflict
+ if r > 0:
+ # Need to decide on shift or reduce here
+ # By default we favor shifting. Need to add
+ # some precedence rules here.
+
+ # Shift precedence comes from the token
+ sprec, slevel = Precedence.get(a, ("right", 0))
+
+ # Reduce precedence comes from rule being reduced (p)
+ rprec, rlevel = Productions[p.number].prec
+
+ if (slevel < rlevel) or (
+ (slevel == rlevel) and (rprec == "left")
+ ):
+ # We really need to reduce here.
+ st_action[a] = -p.number
+ st_actionp[a] = p
+ if not slevel and not rlevel:
+ descrip.append(
+ f" ! shift/reduce conflict for {a} resolved as reduce"
+ )
+ self.sr_conflicts.append((st, a, "reduce"))
+ Productions[p.number].reduced += 1
+ elif (slevel == rlevel) and (rprec == "nonassoc"):
+ st_action[a] = None
+ else:
+ # Hmmm. Guess we'll keep the shift
+ if not rlevel:
+ descrip.append(
+ f" ! shift/reduce conflict for {a} resolved as shift"
+ )
+ self.sr_conflicts.append((st, a, "shift"))
+ elif r <= 0:
+ # Reduce/reduce conflict. In this case, we favor the rule
+ # that was defined first in the grammar file
+ oldp = Productions[-r]
+ pp = Productions[p.number]
+ if oldp.line > pp.line:
+ st_action[a] = -p.number
+ st_actionp[a] = p
+ chosenp, rejectp = pp, oldp
+ Productions[p.number].reduced += 1
+ Productions[oldp.number].reduced -= 1
else:
- raise LALRError(f'Unknown conflict in state {st}')
+ chosenp, rejectp = oldp, pp
+ self.rr_conflicts.append((st, chosenp, rejectp))
+ descrip.append(
+ " ! reduce/reduce conflict for %s resolved using rule %d (%s)"
+ % (a, st_actionp[a].number, st_actionp[a])
+ )
else:
- st_action[a] = -p.number
- st_actionp[a] = p
- Productions[p.number].reduced += 1
- else:
- i = p.lr_index
- a = p.prod[i+1] # Get symbol right after the "."
- if a in self.grammar.Terminals:
- g = self.lr0_goto(I, a)
- j = self.lr0_cidhash.get(id(g), -1)
- if j >= 0:
- # We are in a shift state
- actlist.append((a, p, f'shift and go to state {j}'))
- r = st_action.get(a)
- if r is not None:
- # Whoa have a shift/reduce or shift/shift conflict
- if r > 0:
- if r != j:
- raise LALRError(f'Shift/shift conflict in state {st}')
- elif r <= 0:
- # Do a precedence check.
- # - if precedence of reduce rule is higher, we reduce.
- # - if precedence of reduce is same and left assoc, we reduce.
- # - otherwise we shift
- rprec, rlevel = Productions[st_actionp[a].number].prec
- sprec, slevel = Precedence.get(a, ('right', 0))
- if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
- # We decide to shift here... highest precedence to shift
- Productions[st_actionp[a].number].reduced -= 1
- st_action[a] = j
- st_actionp[a] = p
- if not rlevel:
- descrip.append(f' ! shift/reduce conflict for {a} resolved as shift')
- self.sr_conflicts.append((st, a, 'shift'))
- elif (slevel == rlevel) and (rprec == 'nonassoc'):
- st_action[a] = None
- else:
- # Hmmm. Guess we'll keep the reduce
- if not slevel and not rlevel:
- descrip.append(f' ! shift/reduce conflict for {a} resolved as reduce')
- self.sr_conflicts.append((st, a, 'reduce'))
-
+ raise LALRError(f"Unknown conflict in state {st}")
+ else:
+ st_action[a] = -p.number
+ st_actionp[a] = p
+ Productions[p.number].reduced += 1
+ else:
+ i = p.lr_index
+ a = p.prod[i + 1] # Get symbol right after the "."
+ if a in self.grammar.Terminals:
+ g = self.lr0_goto(I, a)
+ j = self.lr0_cidhash.get(id(g), -1)
+ if j >= 0:
+ # We are in a shift state
+ actlist.append((a, p, f"shift and go to state {j}"))
+ r = st_action.get(a)
+ if r is not None:
+ # Whoa have a shift/reduce or shift/shift conflict
+ if r > 0:
+ if r != j:
+ raise LALRError(
+ f"Shift/shift conflict in state {st}"
+ )
+ elif r <= 0:
+ # Do a precedence check.
+ # - if precedence of reduce rule is higher, we reduce.
+ # - if precedence of reduce is same and left assoc, we reduce.
+ # - otherwise we shift
+ rprec, rlevel = Productions[
+ st_actionp[a].number
+ ].prec
+ sprec, slevel = Precedence.get(a, ("right", 0))
+ if (slevel > rlevel) or (
+ (slevel == rlevel) and (rprec == "right")
+ ):
+ # We decide to shift here... highest precedence to shift
+ Productions[st_actionp[a].number].reduced -= 1
+ st_action[a] = j
+ st_actionp[a] = p
+ if not rlevel:
+ descrip.append(
+ f" ! shift/reduce conflict for {a} resolved as shift"
+ )
+ self.sr_conflicts.append((st, a, "shift"))
+ elif (slevel == rlevel) and (rprec == "nonassoc"):
+ st_action[a] = None
else:
- raise LALRError(f'Unknown conflict in state {st}')
+ # Hmmm. Guess we'll keep the reduce
+ if not slevel and not rlevel:
+ descrip.append(
+ f" ! shift/reduce conflict for {a} resolved as reduce"
+ )
+ self.sr_conflicts.append((st, a, "reduce"))
+
else:
- st_action[a] = j
- st_actionp[a] = p
+ raise LALRError(f"Unknown conflict in state {st}")
+ else:
+ st_action[a] = j
+ st_actionp[a] = p
# Print the actions associated with each terminal
_actprint = {}
for a, p, m in actlist:
if a in st_action:
if p is st_actionp[a]:
- descrip.append(f' {a:<15s} {m}')
+ descrip.append(f" {a:<15s} {m}")
_actprint[(a, m)] = 1
- descrip.append('')
+ descrip.append("")
# Construct the goto table for this state
nkeys = {}
@@ -1503,12 +1574,12 @@ class LRTable(object):
j = self.lr0_cidhash.get(id(g), -1)
if j >= 0:
st_goto[n] = j
- descrip.append(f' {n:<30s} shift and go to state {j}')
+ descrip.append(f" {n:<30s} shift and go to state {j}")
action[st] = st_action
actionp[st] = st_actionp
goto[st] = st_goto
- self.state_descriptions[st] = '\n'.join(descrip)
+ self.state_descriptions[st] = "\n".join(descrip)
# ----------------------------------------------------------------------
# Debugging output. Printing the LRTable object will produce a listing
@@ -1518,28 +1589,33 @@ class LRTable(object):
out = []
for descrip in self.state_descriptions.values():
out.append(descrip)
-
+
if self.sr_conflicts or self.rr_conflicts:
- out.append('\nConflicts:\n')
+ out.append("\nConflicts:\n")
for state, tok, resolution in self.sr_conflicts:
- out.append(f'shift/reduce conflict for {tok} in state {state} resolved as {resolution}')
+ out.append(
+ f"shift/reduce conflict for {tok} in state {state} resolved as {resolution}"
+ )
already_reported = set()
for state, rule, rejected in self.rr_conflicts:
if (state, id(rule), id(rejected)) in already_reported:
continue
- out.append(f'reduce/reduce conflict in state {state} resolved using rule {rule}')
- out.append(f'rejected rule ({rejected}) in state {state}')
+ out.append(
+ f"reduce/reduce conflict in state {state} resolved using rule {rule}"
+ )
+ out.append(f"rejected rule ({rejected}) in state {state}")
already_reported.add((state, id(rule), id(rejected)))
warned_never = set()
for state, rule, rejected in self.rr_conflicts:
if not rejected.reduced and (rejected not in warned_never):
- out.append(f'Rule ({rejected}) is never reduced')
+ out.append(f"Rule ({rejected}) is never reduced")
warned_never.add(rejected)
- return '\n'.join(out)
+ return "\n".join(out)
+
# Collect grammar rules from a function
def _collect_grammar_rules(func):
@@ -1549,70 +1625,80 @@ def _collect_grammar_rules(func):
unwrapped = inspect.unwrap(func)
filename = unwrapped.__code__.co_filename
lineno = unwrapped.__code__.co_firstlineno
- for rule, lineno in zip(func.rules, range(lineno+len(func.rules)-1, 0, -1)):
+ for rule, lineno in zip(func.rules, range(lineno + len(func.rules) - 1, 0, -1)):
syms = rule.split()
- if syms[1:2] == [':'] or syms[1:2] == ['::=']:
+ if syms[1:2] == [":"] or syms[1:2] == ["::="]:
grammar.append((func, filename, lineno, syms[0], syms[2:]))
else:
grammar.append((func, filename, lineno, prodname, syms))
- func = getattr(func, 'next_func', None)
+ func = getattr(func, "next_func", None)
return grammar
+
class ParserMetaDict(dict):
- '''
+ """
Dictionary that allows decorated grammar rule functions to be overloaded
- '''
+ """
+
def __setitem__(self, key, value):
- if key in self and callable(value) and hasattr(value, 'rules'):
+ if key in self and callable(value) and hasattr(value, "rules"):
value.next_func = self[key]
- if not hasattr(value.next_func, 'rules'):
- raise GrammarError(f'Redefinition of {key}. Perhaps an earlier {key} is missing @_')
+ if not hasattr(value.next_func, "rules"):
+ raise GrammarError(
+ f"Redefinition of {key}. Perhaps an earlier {key} is missing @_"
+ )
super().__setitem__(key, value)
-
+
def __getitem__(self, key):
- if key not in self and key.isupper() and key[:1] != '_':
+ if key not in self and key.isupper() and key[:1] != "_":
return key.upper()
else:
return super().__getitem__(key)
+
class ParserMeta(type):
@classmethod
def __prepare__(meta, *args, **kwargs):
d = ParserMetaDict()
+
def _(rule, *extra):
rules = [rule, *extra]
+
def decorate(func):
- func.rules = [ *getattr(func, 'rules', []), *rules[::-1] ]
+ func.rules = [*getattr(func, "rules", []), *rules[::-1]]
return func
+
return decorate
- d['_'] = _
+
+ d["_"] = _
return d
def __new__(meta, clsname, bases, attributes):
- del attributes['_']
+ del attributes["_"]
cls = super().__new__(meta, clsname, bases, attributes)
cls._build(list(attributes.items()))
return cls
+
class Parser(metaclass=ParserMeta):
# Logging object where debugging/diagnostic messages are sent
- log = SlyLogger(sys.stderr)
+ log = SlyLogger(sys.stderr)
# Debugging filename where parsetab.out data can be written
debugfile = None
@classmethod
def __validate_tokens(cls):
- if not hasattr(cls, 'tokens'):
- cls.log.error('No token list is defined')
+ if not hasattr(cls, "tokens"):
+ cls.log.error("No token list is defined")
return False
if not cls.tokens:
- cls.log.error('tokens is empty')
+ cls.log.error("tokens is empty")
return False
- if 'error' in cls.tokens:
+ if "error" in cls.tokens:
cls.log.error("Illegal token name 'error'. Is a reserved word")
return False
@@ -1620,28 +1706,32 @@ class Parser(metaclass=ParserMeta):
@classmethod
def __validate_precedence(cls):
- if not hasattr(cls, 'precedence'):
+ if not hasattr(cls, "precedence"):
cls.__preclist = []
return True
preclist = []
if not isinstance(cls.precedence, (list, tuple)):
- cls.log.error('precedence must be a list or tuple')
+ cls.log.error("precedence must be a list or tuple")
return False
for level, p in enumerate(cls.precedence, start=1):
if not isinstance(p, (list, tuple)):
- cls.log.error(f'Bad precedence table entry {p!r}. Must be a list or tuple')
+ cls.log.error(
+ f"Bad precedence table entry {p!r}. Must be a list or tuple"
+ )
return False
if len(p) < 2:
- cls.log.error(f'Malformed precedence entry {p!r}. Must be (assoc, term, ..., term)')
+ cls.log.error(
+ f"Malformed precedence entry {p!r}. Must be (assoc, term, ..., term)"
+ )
return False
if not all(isinstance(term, str) for term in p):
- cls.log.error('precedence items must be strings')
+ cls.log.error("precedence items must be strings")
return False
-
+
assoc = p[0]
preclist.extend((term, assoc, level) for term in p[1:])
@@ -1650,9 +1740,9 @@ class Parser(metaclass=ParserMeta):
@classmethod
def __validate_specification(cls):
- '''
+ """
Validate various parts of the grammar specification
- '''
+ """
if not cls.__validate_tokens():
return False
if not cls.__validate_precedence():
@@ -1661,14 +1751,14 @@ class Parser(metaclass=ParserMeta):
@classmethod
def __build_grammar(cls, rules):
- '''
+ """
Build the grammar from the grammar rules
- '''
+ """
grammar_rules = []
- errors = ''
+ errors = ""
# Check for non-empty symbols
if not rules:
- raise YaccError('No grammar rules are defined')
+ raise YaccError("No grammar rules are defined")
grammar = Grammar(cls.tokens)
@@ -1677,95 +1767,110 @@ class Parser(metaclass=ParserMeta):
try:
grammar.set_precedence(term, assoc, level)
except GrammarError as e:
- errors += f'{e}\n'
+ errors += f"{e}\n"
for name, func in rules:
try:
parsed_rule = _collect_grammar_rules(func)
for pfunc, rulefile, ruleline, prodname, syms in parsed_rule:
try:
- grammar.add_production(prodname, syms, pfunc, rulefile, ruleline)
+ grammar.add_production(
+ prodname, syms, pfunc, rulefile, ruleline
+ )
except GrammarError as e:
- errors += f'{e}\n'
+ errors += f"{e}\n"
except SyntaxError as e:
- errors += f'{e}\n'
+ errors += f"{e}\n"
try:
- grammar.set_start(getattr(cls, 'start', None))
+ grammar.set_start(getattr(cls, "start", None))
except GrammarError as e:
- errors += f'{e}\n'
+ errors += f"{e}\n"
undefined_symbols = grammar.undefined_symbols()
for sym, prod in undefined_symbols:
- errors += '%s:%d: Symbol %r used, but not defined as a token or a rule\n' % (prod.file, prod.line, sym)
+ errors += (
+ "%s:%d: Symbol %r used, but not defined as a token or a rule\n"
+ % (prod.file, prod.line, sym)
+ )
unused_terminals = grammar.unused_terminals()
if unused_terminals:
- unused_str = '{' + ','.join(unused_terminals) + '}'
- cls.log.warning(f'Token{"(s)" if len(unused_terminals) >1 else ""} {unused_str} defined, but not used')
+ unused_str = "{" + ",".join(unused_terminals) + "}"
+ cls.log.warning(
+ f'Token{"(s)" if len(unused_terminals) >1 else ""} {unused_str} defined, but not used'
+ )
unused_rules = grammar.unused_rules()
for prod in unused_rules:
- cls.log.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
+ cls.log.warning(
+ "%s:%d: Rule %r defined, but not used", prod.file, prod.line, prod.name
+ )
if len(unused_terminals) == 1:
- cls.log.warning('There is 1 unused token')
+ cls.log.warning("There is 1 unused token")
if len(unused_terminals) > 1:
- cls.log.warning('There are %d unused tokens', len(unused_terminals))
+ cls.log.warning("There are %d unused tokens", len(unused_terminals))
if len(unused_rules) == 1:
- cls.log.warning('There is 1 unused rule')
+ cls.log.warning("There is 1 unused rule")
if len(unused_rules) > 1:
- cls.log.warning('There are %d unused rules', len(unused_rules))
+ cls.log.warning("There are %d unused rules", len(unused_rules))
unreachable = grammar.find_unreachable()
for u in unreachable:
- cls.log.warning('Symbol %r is unreachable', u)
+ cls.log.warning("Symbol %r is unreachable", u)
if len(undefined_symbols) == 0:
infinite = grammar.infinite_cycles()
for inf in infinite:
- errors += 'Infinite recursion detected for symbol %r\n' % inf
+ errors += "Infinite recursion detected for symbol %r\n" % inf
unused_prec = grammar.unused_precedence()
for term, assoc in unused_prec:
- errors += 'Precedence rule %r defined for unknown symbol %r\n' % (assoc, term)
+ errors += "Precedence rule %r defined for unknown symbol %r\n" % (
+ assoc,
+ term,
+ )
cls._grammar = grammar
if errors:
- raise YaccError('Unable to build grammar.\n'+errors)
+ raise YaccError("Unable to build grammar.\n" + errors)
@classmethod
def __build_lrtables(cls):
- '''
+ """
Build the LR Parsing tables from the grammar
- '''
+ """
lrtable = LRTable(cls._grammar)
num_sr = len(lrtable.sr_conflicts)
# Report shift/reduce and reduce/reduce conflicts
- if num_sr != getattr(cls, 'expected_shift_reduce', None):
+ if num_sr != getattr(cls, "expected_shift_reduce", None):
if num_sr == 1:
- cls.log.warning('1 shift/reduce conflict')
+ cls.log.warning("1 shift/reduce conflict")
elif num_sr > 1:
- cls.log.warning('%d shift/reduce conflicts', num_sr)
+ cls.log.warning("%d shift/reduce conflicts", num_sr)
num_rr = len(lrtable.rr_conflicts)
- if num_rr != getattr(cls, 'expected_reduce_reduce', None):
+ if num_rr != getattr(cls, "expected_reduce_reduce", None):
if num_rr == 1:
- cls.log.warning('1 reduce/reduce conflict')
+ cls.log.warning("1 reduce/reduce conflict")
elif num_rr > 1:
- cls.log.warning('%d reduce/reduce conflicts', num_rr)
+ cls.log.warning("%d reduce/reduce conflicts", num_rr)
cls._lrtable = lrtable
return True
@classmethod
def __collect_rules(cls, definitions):
- '''
+ """
Collect all of the tagged grammar rules
- '''
- rules = [ (name, value) for name, value in definitions
- if callable(value) and hasattr(value, 'rules') ]
+ """
+ rules = [
+ (name, value)
+ for name, value in definitions
+ if callable(value) and hasattr(value, "rules")
+ ]
return rules
# ----------------------------------------------------------------------
@@ -1775,7 +1880,7 @@ class Parser(metaclass=ParserMeta):
# ----------------------------------------------------------------------
@classmethod
def _build(cls, definitions):
- if vars(cls).get('_build', False):
+ if vars(cls).get("_build", False):
return
# Collect all of the grammar rules from the class definition
@@ -1783,77 +1888,89 @@ class Parser(metaclass=ParserMeta):
# Validate other parts of the grammar specification
if not cls.__validate_specification():
- raise YaccError('Invalid parser specification')
+ raise YaccError("Invalid parser specification")
# Build the underlying grammar object
cls.__build_grammar(rules)
# Build the LR tables
if not cls.__build_lrtables():
- raise YaccError('Can\'t build parsing tables')
+ raise YaccError("Can't build parsing tables")
if cls.debugfile:
- with open(cls.debugfile, 'w') as f:
+ with open(cls.debugfile, "w") as f:
f.write(str(cls._grammar))
- f.write('\n')
+ f.write("\n")
f.write(str(cls._lrtable))
- cls.log.info('Parser debugging for %s written to %s', cls.__qualname__, cls.debugfile)
+ cls.log.info(
+ "Parser debugging for %s written to %s", cls.__qualname__, cls.debugfile
+ )
# ----------------------------------------------------------------------
# Parsing Support. This is the parsing runtime that users use to
# ----------------------------------------------------------------------
def error(self, token):
- '''
+ """
Default error handling function. This may be subclassed.
- '''
+ """
if token:
- lineno = getattr(token, 'lineno', 0)
+ lineno = getattr(token, "lineno", 0)
if lineno:
- sys.stderr.write(f'sly: Syntax error at line {lineno}, token={token.type}\n')
+ sys.stderr.write(
+ f"sly: Syntax error at line {lineno}, token={token.type}\n"
+ )
else:
- sys.stderr.write(f'sly: Syntax error, token={token.type}')
+ sys.stderr.write(f"sly: Syntax error, token={token.type}")
else:
- sys.stderr.write('sly: Parse error in input. EOF\n')
-
+ sys.stderr.write("sly: Parse error in input. EOF\n")
+
def errok(self):
- '''
+ """
Clear the error status
- '''
+ """
self.errorok = True
def restart(self):
- '''
+ """
Force the parser to restart from a fresh state. Clears the statestack
- '''
+ """
del self.statestack[:]
del self.symstack[:]
sym = YaccSymbol()
- sym.type = '$end'
+ sym.type = "$end"
self.symstack.append(sym)
self.statestack.append(0)
self.state = 0
def parse(self, tokens):
- '''
+ """
Parse the given input tokens.
- '''
- lookahead = None # Current lookahead symbol
- lookaheadstack = [] # Stack of lookahead symbols
- actions = self._lrtable.lr_action # Local reference to action table (to avoid lookup on self.)
- goto = self._lrtable.lr_goto # Local reference to goto table (to avoid lookup on self.)
- prod = self._grammar.Productions # Local reference to production list (to avoid lookup on self.)
- defaulted_states = self._lrtable.defaulted_states # Local reference to defaulted states
- pslice = YaccProduction(None) # Production object passed to grammar rules
- errorcount = 0 # Used during error recovery
+ """
+ lookahead = None # Current lookahead symbol
+ lookaheadstack = [] # Stack of lookahead symbols
+ actions = (
+ self._lrtable.lr_action
+ ) # Local reference to action table (to avoid lookup on self.)
+ goto = (
+ self._lrtable.lr_goto
+ ) # Local reference to goto table (to avoid lookup on self.)
+ prod = (
+ self._grammar.Productions
+ ) # Local reference to production list (to avoid lookup on self.)
+ defaulted_states = (
+ self._lrtable.defaulted_states
+ ) # Local reference to defaulted states
+ pslice = YaccProduction(None) # Production object passed to grammar rules
+ errorcount = 0 # Used during error recovery
# Set up the state and symbol stacks
self.tokens = tokens
- self.statestack = statestack = [] # Stack of parsing states
- self.symstack = symstack = [] # Stack of grammar symbols
- pslice._stack = symstack # Associate the stack with the production
+ self.statestack = statestack = [] # Stack of parsing states
+ self.symstack = symstack = [] # Stack of grammar symbols
+ pslice._stack = symstack # Associate the stack with the production
self.restart()
- errtoken = None # Err token
+ errtoken = None # Err token
while True:
# Get the next symbol on the input. If a lookahead symbol
# is already set, we just use that. Otherwise, we'll pull
@@ -1866,7 +1983,7 @@ class Parser(metaclass=ParserMeta):
lookahead = lookaheadstack.pop()
if not lookahead:
lookahead = YaccSymbol()
- lookahead.type = '$end'
+ lookahead.type = "$end"
# Check the action table
ltype = lookahead.type
@@ -1892,14 +2009,14 @@ class Parser(metaclass=ParserMeta):
# reduce a symbol on the stack, emit a production
self.production = p = prod[-t]
pname = p.name
- plen = p.len
+ plen = p.len
pslice._namemap = p.namemap
# Call the production function
pslice._slice = symstack[-plen:] if plen else []
sym = YaccSymbol()
- sym.type = pname
+ sym.type = pname
value = p.func(self, pslice)
if value is pslice:
value = (pname, *(s.value for s in pslice._slice))
@@ -1915,7 +2032,7 @@ class Parser(metaclass=ParserMeta):
if t == 0:
n = symstack[-1]
- result = getattr(n, 'value', None)
+ result = getattr(n, "value", None)
return result
if t is None:
@@ -1932,8 +2049,8 @@ class Parser(metaclass=ParserMeta):
if errorcount == 0 or self.errorok:
errorcount = ERROR_COUNT
self.errorok = False
- if lookahead.type == '$end':
- errtoken = None # End of file!
+ if lookahead.type == "$end":
+ errtoken = None # End of file!
else:
errtoken = lookahead
@@ -1957,7 +2074,7 @@ class Parser(metaclass=ParserMeta):
# entire parse has been rolled back and we're completely hosed. The token is
# discarded and we just keep going.
- if len(statestack) <= 1 and lookahead.type != '$end':
+ if len(statestack) <= 1 and lookahead.type != "$end":
lookahead = None
self.state = 0
# Nuke the lookahead stack
@@ -1968,13 +2085,13 @@ class Parser(metaclass=ParserMeta):
# at the end of the file. nuke the top entry and generate an error token
# Start nuking entries on the stack
- if lookahead.type == '$end':
+ if lookahead.type == "$end":
# Whoa. We're really hosed here. Bail out
return
- if lookahead.type != 'error':
+ if lookahead.type != "error":
sym = symstack[-1]
- if sym.type == 'error':
+ if sym.type == "error":
# Hmmm. Error is on top of stack, we'll just nuke input
# symbol and continue
lookahead = None
@@ -1982,11 +2099,11 @@ class Parser(metaclass=ParserMeta):
# Create the error symbol for the first time and make it the new lookahead symbol
t = YaccSymbol()
- t.type = 'error'
+ t.type = "error"
- if hasattr(lookahead, 'lineno'):
+ if hasattr(lookahead, "lineno"):
t.lineno = lookahead.lineno
- if hasattr(lookahead, 'index'):
+ if hasattr(lookahead, "index"):
t.index = lookahead.index
t.value = lookahead
lookaheadstack.append(lookahead)
@@ -1998,4 +2115,4 @@ class Parser(metaclass=ParserMeta):
continue
# Call an error function here
- raise RuntimeError('sly: internal parser error!!!\n')
+ raise RuntimeError("sly: internal parser error!!!\n")