7 files changed, 384 insertions, 124 deletions
diff --git a/lib/kconfig.py b/lib/kconfig.py
new file mode 100644
index 0000000..6ae947a
--- /dev/null
+++ b/lib/kconfig.py
@@ -0,0 +1,222 @@
+#!/usr/bin/env python3
+
+import kconfiglib
+import logging
+import re
+import shutil
+import subprocess
+
+from versuchung.experiment import Experiment
+from versuchung.types import String, Bool, Integer
+from versuchung.files import File, Directory
+
+logger = logging.getLogger(__name__)
+
+
+class AttributeExperiment(Experiment):
+    outputs = {
+        "config": File(".config"),
+        "attributes": File("attributes.json"),
+        "build_out": File("build.out"),
+        "build_err": File("build.err"),
+    }
+
+    def run(self):
+        build_command = self.build_command.value.split()
+        attr_command = self.attr_command.value.split()
+        shutil.copyfile(f"{self.project_root.path}/.config", self.config.path)
+        subprocess.check_call(
+            ["make", "clean"],
+            cwd=self.project_root.path,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+        try:
+            with open(self.build_out.path, "w") as out_fd, open(
+                self.build_err.path, "w"
+            ) as err_fd:
+                subprocess.check_call(
+                    build_command,
+                    cwd=self.project_root.path,
+                    stdout=out_fd,
+                    stderr=err_fd,
+                )
+        except subprocess.CalledProcessError:
+            logger.info("build error")
+            return
+        with open(self.attributes.path, "w") as attr_fd:
+            subprocess.check_call(
+                attr_command, cwd=self.project_root.path, stdout=attr_fd
+            )
+
+
+class RandomConfig(AttributeExperiment):
+    inputs = {
+        "randconfig_seed": String("FIXME"),
+        "kconfig_hash": String("FIXME"),
+        "project_root": Directory("/tmp"),
+        "project_version": String("FIXME"),
+        "clean_command": String("make clean"),
+        "build_command": String("make"),
+        "attr_command": String("make attributes"),
+    }
+
+
+class ExploreConfig(AttributeExperiment):
+    inputs = {
+        "config_hash": String("FIXME"),
+        "kconfig_hash": String("FIXME"),
+        "project_root": Directory("/tmp"),
+        "project_version": String("FIXME"),
+        "clean_command": String("make clean"),
+        "build_command": String("make"),
+        "attr_command": String("make attributes"),
+    }
+
+
+class KConfig:
+    def __init__(self, working_directory):
+        self.cwd = working_directory
+        self.clean_command = "make clean"
+        self.build_command = "make"
+        self.attribute_command = "make attributes"
+
+    def randconfig(self):
+        status = subprocess.run(
+            ["make", "randconfig"],
+            cwd=self.cwd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            universal_newlines=True,
+        )
+
+        # make randconfig occasionally generates illegal configurations, so a project may run randconfig more than once.
+        # Make sure to return the seed of the latest run (don't short-circuit).
+        seed = None
+        for line in status.stderr.split("\n"):
+            match = re.match("KCONFIG_SEED=(.*)", line)
+            if match:
+                seed = match.group(1)
+        if seed:
+            return seed
+        raise RuntimeError("KCONFIG_SEED not found")
+
+    def git_commit_id(self):
+        status = subprocess.run(
+            ["git", "rev-parse", "HEAD"],
+            cwd=self.cwd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            universal_newlines=True,
+        )
+        revision = status.stdout.strip()
+        return revision
+
+    def file_hash(self, config_file):
+        status = subprocess.run(
+            ["sha256sum", config_file],
+            cwd=self.cwd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            universal_newlines=True,
+        )
+        sha256sum = status.stdout.split()[0]
+        return sha256sum
+
+    def run_randconfig(self):
+        """Run a randomconfig experiment in the selected project. Results are written to the current working directory."""
+        experiment = RandomConfig()
+        experiment(
+            [
+                "--randconfig_seed",
+                self.randconfig(),
+                "--kconfig_hash",
+                self.file_hash(f"{self.cwd}/Kconfig"),
+                "--project_version",
+                self.git_commit_id(),
+                "--project_root",
+                self.cwd,
+                "--clean_command",
+                self.clean_command,
+                "--build_command",
+                self.build_command,
+                "--attr_command",
+                self.attribute_command,
+            ]
+        )
+
+    def config_is_functional(self, kconf):
+        for choice in kconf.choices:
+            if (
+                not choice.is_optional
+                and 2 in choice.assignable
+                and choice.selection is None
+            ):
+                return False
+        return True
+
+    def run_exploration_from_file(self, config_file):
+        kconfig_file = f"{self.cwd}/Kconfig"
+        kconf = kconfiglib.Kconfig(kconfig_file)
+        kconf.load_config(config_file)
+        symbols = list(kconf.syms.keys())
+
+        experiment = ExploreConfig()
+        shutil.copyfile(config_file, f"{self.cwd}/.config")
+        experiment(
+            [
+                "--config_hash",
+                self.file_hash(config_file),
+                "--kconfig_hash",
+                self.file_hash(kconfig_file),
+                "--project_version",
+                self.git_commit_id(),
+                "--project_root",
+                self.cwd,
+                "--clean_command",
+                self.clean_command,
+                "--build_command",
+                self.build_command,
+                "--attr_command",
+                self.attribute_command,
+            ]
+        )
+
+        for symbol in kconf.syms.values():
+            if kconfiglib.TYPE_TO_STR[symbol.type] != "bool":
+                continue
+            if symbol.tri_value == 0 and 2 in symbol.assignable:
+                logger.debug(f"Set {symbol.name} to y")
+                symbol.set_value(2)
+            elif symbol.tri_value == 2 and 0 in symbol.assignable:
+                logger.debug(f"Set {symbol.name} to n")
+                symbol.set_value(0)
+            else:
+                continue
+
+            if not self.config_is_functional(kconf):
+                logger.debug("Configuration is non-functional")
+                kconf.load_config(config_file)
+                continue
+
+            kconf.write_config(f"{self.cwd}/.config")
+            experiment = ExploreConfig()
+            experiment(
+                [
+                    "--config_hash",
+                    self.file_hash(f"{self.cwd}/.config"),
+                    "--kconfig_hash",
+                    self.file_hash(kconfig_file),
+                    "--project_version",
+                    self.git_commit_id(),
+                    "--project_root",
+                    self.cwd,
+                    "--clean_command",
+                    self.clean_command,
+                    "--build_command",
+                    self.build_command,
+                    "--attr_command",
+                    self.attribute_command,
+                ]
+            )
+            kconf.load_config(config_file)
diff --git a/lib/kconfiglib.py b/lib/kconfiglib.py
new file mode 120000
index 0000000..5b2f9ac
--- /dev/null
+++ b/lib/kconfiglib.py
@@ -0,0 +1 @@
+../ext/kconfiglib/kconfiglib.py
+\ No newline at end of file
diff --git a/lib/loader.py b/lib/loader.py
index ea2b183..fcd5490 100644
--- a/lib/loader.py
+++ b/lib/loader.py
@@ -489,7 +489,7 @@ class RawData:
             if sorted(online_trace_part["parameter"].keys()) != self._parameter_names:
                 processed_data[
                     "error"
-                ] = "Offline #{off_idx:d} (online {on_name:s} @ {on_idx:d}/{on_sub:d}) has inconsistent parameter set: should be {param_want:s}, is {param_is:s}".format(
+                ] = "Offline #{off_idx:d} (online {on_name:s} @ {on_idx:d}/{on_sub:d}) has inconsistent parameter set: should be {param_want}, is {param_is}".format(
                     off_idx=offline_idx,
                     on_idx=online_run_idx,
                     on_sub=online_trace_part_idx,
diff --git a/lib/model.py b/lib/model.py
index e908af4..bb4a45b 100644
--- a/lib/model.py
+++ b/lib/model.py
@@ -5,6 +5,7 @@ import numpy as np
 from scipy import optimize
 from sklearn.metrics import r2_score
 from multiprocessing import Pool
+from .automata import PTA
 from .functions import analytic
 from .functions import AnalyticFunction
 from .parameters import ParamStats
@@ -700,7 +701,6 @@ class PTAModel:
         arg_count,
         traces=[],
         ignore_trace_indexes=[],
-        discard_outliers=None,
         function_override={},
         use_corrcoef=False,
         pta=None,
@@ -716,13 +716,6 @@ class PTAModel:
         arg_count -- function arguments, as returned by pta_trace_to_aggregate
         traces -- list of preprocessed DFA traces, as returned by RawData.get_preprocessed_data()
         ignore_trace_indexes -- list of trace indexes. The corresponding traces will be ignored.
-        discard_outliers -- currently not supported: threshold for outlier detection and removel (float).
-            Outlier detection is performed individually for each state/transition in each trace,
-            so it only works if the benchmark ran several times.
-            Given "data" (a set of measurements of the same thing, e.g. TX duration in the third benchmark trace),
-            "m" (the median of all attribute measurements with the same parameters, which may include data from other traces),
-            a data point X is considered an outlier if
-            | 0.6745 * (X - m) / median(|data - m|) | > discard_outliers .
         function_override -- dict of overrides for automatic parameter function generation.
             If (state or transition name, model attribute) is present in function_override,
             the corresponding text string is the function used for analytic (parameter-aware/fitted)
@@ -749,7 +742,6 @@ class PTAModel:
         )
         self.cache = {}
         np.seterr("raise")
-        self._outlier_threshold = discard_outliers
         self.function_override = function_override.copy()
         self.pta = pta
         self.ignore_trace_indexes = ignore_trace_indexes
@@ -940,13 +932,16 @@ class PTAModel:
         static_quality = self.assess(static_model)
         param_model, param_info = self.get_fitted()
         analytic_quality = self.assess(param_model)
-        self.pta.update(
+        pta = self.pta
+        if pta is None:
+            pta = PTA(self.states(), parameters=self._parameter_names)
+        pta.update(
             static_model,
             param_info,
             static_error=static_quality["by_name"],
             analytic_error=analytic_quality["by_name"],
         )
-        return self.pta.to_json()
+        return pta.to_json()
 
     def states(self):
         """Return sorted list of state names."""
diff --git a/lib/parameters.py b/lib/parameters.py
index 81649f2..5c6b978 100644
--- a/lib/parameters.py
+++ b/lib/parameters.py
@@ -250,6 +250,8 @@ def _compute_param_statistics(
     corr_by_param -- correlation coefficient
     corr_by_arg -- same, but ignoring a single function argument
         Only set if state_or_trans appears in arg_count, empty dict otherwise.
+    depends_on_param -- dict(parameter_name -> Bool). True if /attribute/ behaviour probably depends on /parameter_name/
+    depends_on_arg -- list(bool). Same, but for function arguments, if any.
     """
     ret = {
         "std_static": np.std(by_name[state_or_trans][attribute]),
@@ -270,7 +272,6 @@ def _compute_param_statistics(
         "corr_by_arg": [],
         "depends_on_param": {},
         "depends_on_arg": [],
-        "param_data": {},
     }
 
     np.seterr("raise")
diff --git a/lib/runner.py b/lib/runner.py
index 4cab9ed..96627cf 100644
--- a/lib/runner.py
+++ b/lib/runner.py
@@ -340,113 +340,157 @@ class ShellMonitor:
         pass
 
 
-def build(arch, app, opts=[]):
-    command = ["make", "arch={}".format(arch), "app={}".format(app), "clean"]
-    command.extend(opts)
-    res = subprocess.run(
-        command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True
-    )
-    if res.returncode != 0:
-        raise RuntimeError(
-            "Build failure, executing {}:\n".format(command) + res.stderr
+class Arch:
+    def __init__(self, name, opts=list()):
+        self.name = name
+        self.opts = opts
+        self.info = self.get_info()
+
+    def build(self, app, opts=list()):
+        command = ["make", "arch={}".format(self.name), "app={}".format(app), "clean"]
+        command.extend(self.opts)
+        command.extend(opts)
+        res = subprocess.run(
+            command,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            universal_newlines=True,
         )
-    command = ["make", "-B", "arch={}".format(arch), "app={}".format(app)]
-    command.extend(opts)
-    res = subprocess.run(
-        command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True
-    )
-    if res.returncode != 0:
-        raise RuntimeError(
-            "Build failure, executing {}:\n ".format(command) + res.stderr
+        if res.returncode != 0:
+            raise RuntimeError(
+                "Build failure, executing {}:\n".format(command) + res.stderr
+            )
+        command = ["make", "-B", "arch={}".format(self.name), "app={}".format(app)]
+        command.extend(self.opts)
+        command.extend(opts)
+        res = subprocess.run(
+            command,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            universal_newlines=True,
         )
-    return command
-
-
-def flash(arch, app, opts=[]):
-    command = ["make", "arch={}".format(arch), "app={}".format(app), "program"]
-    command.extend(opts)
-    res = subprocess.run(
-        command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True
-    )
-    if res.returncode != 0:
-        raise RuntimeError("Flash failure")
-    return command
+        if res.returncode != 0:
+            raise RuntimeError(
+                "Build failure, executing {}:\n ".format(command) + res.stderr
+            )
+        return command
 
+    def flash(self, app, opts=list()):
+        command = ["make", "arch={}".format(self.name), "app={}".format(app), "program"]
+        command.extend(self.opts)
+        command.extend(opts)
+        res = subprocess.run(
+            command,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            universal_newlines=True,
+        )
+        if res.returncode != 0:
+            raise RuntimeError("Flash failure")
+        return command
 
-def get_info(arch, opts: list = []) -> list:
-    """
-    Return multipass "make info" output.
+    def get_info(self, opts=list()) -> list:
+        """
+        Return multipass "make info" output.
 
-    Returns a list.
-    """
-    command = ["make", "arch={}".format(arch), "info"]
-    command.extend(opts)
-    res = subprocess.run(
-        command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True
-    )
-    if res.returncode != 0:
-        raise RuntimeError("make info Failure")
-    return res.stdout.split("\n")
+        Returns a list.
+        """
+        command = ["make", "arch={}".format(self.name), "info"]
+        command.extend(self.opts)
+        command.extend(opts)
+        res = subprocess.run(
+            command,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            universal_newlines=True,
+        )
+        if res.returncode != 0:
+            raise RuntimeError("make info Failure")
+        return res.stdout.split("\n")
 
+    def _cached_info(self, opts=list()) -> list:
+        if len(opts):
+            return self.get_info(opts)
+        return self.info
 
-def get_monitor(arch: str, **kwargs) -> object:
-    """
-    Return an appropriate monitor for arch, depending on "make info" output.
+    def get_monitor(self, **kwargs) -> object:
+        """
+        Return an appropriate monitor for arch, depending on "make info" output.
 
-    Port and Baud rate are taken from "make info".
+        Port and Baud rate are taken from "make info".
 
-    :param arch: architecture name, e.g. 'msp430fr5994lp' or 'posix'
-    :param energytrace: `EnergyTraceMonitor` options. Returns an EnergyTrace monitor if not None.
-    :param mimosa: `MIMOSAMonitor` options. Returns a MIMOSA monitor if not None.
-    """
-    for line in get_info(arch):
-        if "Monitor:" in line:
-            _, port, arg = line.split(" ")
-            if port == "run":
-                return ShellMonitor(arg, **kwargs)
-            elif "mimosa" in kwargs and kwargs["mimosa"] is not None:
-                mimosa_kwargs = kwargs.pop("mimosa")
-                return MIMOSAMonitor(port, arg, **mimosa_kwargs, **kwargs)
-            elif "energytrace" in kwargs and kwargs["energytrace"] is not None:
-                energytrace_kwargs = kwargs.pop("energytrace").copy()
-                sync_mode = energytrace_kwargs.pop("sync")
-                if sync_mode == "la":
-                    return EnergyTraceLogicAnalyzerMonitor(
-                        port, arg, **energytrace_kwargs, **kwargs
-                    )
+        :param energytrace: `EnergyTraceMonitor` options. Returns an EnergyTrace monitor if not None.
+        :param mimosa: `MIMOSAMonitor` options. Returns a MIMOSA monitor if not None.
+        """
+        for line in self.info:
+            if "Monitor:" in line:
+                _, port, arg = line.split(" ")
+                if port == "run":
+                    return ShellMonitor(arg, **kwargs)
+                elif "mimosa" in kwargs and kwargs["mimosa"] is not None:
+                    mimosa_kwargs = kwargs.pop("mimosa")
+                    return MIMOSAMonitor(port, arg, **mimosa_kwargs, **kwargs)
+                elif "energytrace" in kwargs and kwargs["energytrace"] is not None:
+                    energytrace_kwargs = kwargs.pop("energytrace").copy()
+                    sync_mode = energytrace_kwargs.pop("sync")
+                    if sync_mode == "la":
+                        return EnergyTraceLogicAnalyzerMonitor(
+                            port, arg, **energytrace_kwargs, **kwargs
+                        )
+                    else:
+                        return EnergyTraceMonitor(
+                            port, arg, **energytrace_kwargs, **kwargs
+                        )
                 else:
-                    return EnergyTraceMonitor(port, arg, **energytrace_kwargs, **kwargs)
+                    kwargs.pop("energytrace", None)
+                    kwargs.pop("mimosa", None)
+                    return SerialMonitor(port, arg, **kwargs)
+        raise RuntimeError("Monitor failure")
+
+    def get_counter_limits(self, opts=list()) -> tuple:
+        """Return multipass max counter and max overflow value for arch."""
+        for line in self._cached_info(opts):
+            match = re.match("Counter Overflow: ([^/]*)/(.*)", line)
+            if match:
+                overflow_value = int(match.group(1))
+                max_overflow = int(match.group(2))
+                return overflow_value, max_overflow
+        raise RuntimeError("Did not find Counter Overflow limits")
+
+    def sleep_ms(self, duration: int, opts=list()) -> str:
+        max_sleep = None
+        if "msp430fr" in self.name:
+            cpu_freq = None
+            for line in self._cached_info(opts):
+                match = re.match(r"CPU\s+Freq:\s+(.*)\s+Hz", line)
+                if match:
+                    cpu_freq = int(match.group(1))
+            if cpu_freq is not None and cpu_freq > 8000000:
+                max_sleep = 250
             else:
-                kwargs.pop("energytrace", None)
-                kwargs.pop("mimosa", None)
-                return SerialMonitor(port, arg, **kwargs)
-    raise RuntimeError("Monitor failure")
-
-
-def get_counter_limits(arch: str) -> tuple:
-    """Return multipass max counter and max overflow value for arch."""
-    for line in get_info(arch):
-        match = re.match("Counter Overflow: ([^/]*)/(.*)", line)
-        if match:
-            overflow_value = int(match.group(1))
-            max_overflow = int(match.group(2))
-            return overflow_value, max_overflow
-    raise RuntimeError("Did not find Counter Overflow limits")
-
-
-def get_counter_limits_us(arch: str) -> tuple:
-    """Return duration of one counter step and one counter overflow in us."""
-    cpu_freq = 0
-    overflow_value = 0
-    max_overflow = 0
-    for line in get_info(arch):
-        match = re.match(r"CPU\s+Freq:\s+(.*)\s+Hz", line)
-        if match:
-            cpu_freq = int(match.group(1))
-        match = re.match(r"Counter Overflow:\s+([^/]*)/(.*)", line)
-        if match:
-            overflow_value = int(match.group(1))
-            max_overflow = int(match.group(2))
-    if cpu_freq and overflow_value:
-        return 1000000 / cpu_freq, overflow_value * 1000000 / cpu_freq, max_overflow
-    raise RuntimeError("Did not find Counter Overflow limits")
+                max_sleep = 500
+        if max_sleep is not None and duration > max_sleep:
+            sub_sleep_count = duration // max_sleep
+            tail_sleep = duration % max_sleep
+            ret = f"for (unsigned char i = 0; i < {sub_sleep_count}; i++) {{ arch.sleep_ms({max_sleep}); }}\n"
+            if tail_sleep > 0:
+                ret += f"arch.sleep_ms({tail_sleep});\n"
+            return ret
+        return f"arch.sleep_ms({duration});\n"
+
+    def get_counter_limits_us(self, opts=list()) -> tuple:
+        """Return duration of one counter step and one counter overflow in us."""
+        cpu_freq = 0
+        overflow_value = 0
+        max_overflow = 0
+        for line in self._cached_info(opts):
+            match = re.match(r"CPU\s+Freq:\s+(.*)\s+Hz", line)
+            if match:
+                cpu_freq = int(match.group(1))
+            match = re.match(r"Counter Overflow:\s+([^/]*)/(.*)", line)
+            if match:
+                overflow_value = int(match.group(1))
+                max_overflow = int(match.group(2))
+        if cpu_freq and overflow_value:
+            return 1000000 / cpu_freq, overflow_value * 1000000 / cpu_freq, max_overflow
+        raise RuntimeError("Did not find Counter Overflow limits")
diff --git a/lib/validation.py b/lib/validation.py
index 98d49c1..ee147fe 100644
--- a/lib/validation.py
+++ b/lib/validation.py
@@ -179,6 +179,7 @@ class CrossValidator:
             for attribute in self.by_name[name]["attributes"]:
                 ret["by_name"][name][attribute] = {
                     "mae_list": list(),
+                    "rmsd_list": list(),
                     "smape_list": list(),
                 }
 
@@ -186,21 +187,17 @@ class CrossValidator:
             res = self._single_xv(model_getter, training_and_validation_by_name)
             for name in self.names:
                 for attribute in self.by_name[name]["attributes"]:
-                    ret["by_name"][name][attribute]["mae_list"].append(
-                        res["by_name"][name][attribute]["mae"]
-                    )
-                    ret["by_name"][name][attribute]["smape_list"].append(
-                        res["by_name"][name][attribute]["smape"]
-                    )
+                    for measure in ("mae", "rmsd", "smape"):
+                        ret["by_name"][name][attribute][f"{measure}_list"].append(
+                            res["by_name"][name][attribute][measure]
+                        )
 
         for name in self.names:
             for attribute in self.by_name[name]["attributes"]:
-                ret["by_name"][name][attribute]["mae"] = np.mean(
-                    ret["by_name"][name][attribute]["mae_list"]
-                )
-                ret["by_name"][name][attribute]["smape"] = np.mean(
-                    ret["by_name"][name][attribute]["smape_list"]
-                )
+                for measure in ("mae", "rmsd", "smape"):
+                    ret["by_name"][name][attribute][measure] = np.mean(
+                        ret["by_name"][name][attribute][f"{measure}_list"]
+                    )
 
         return ret