add legacy genetic programming (symbolic regression) benchmark

author: Daniel Friesel <daniel.friesel@uos.de> 2019-10-08 08:22:50 +0200
committer: Daniel Friesel <daniel.friesel@uos.de> 2019-10-08 08:22:50 +0200
commit: 30926c1b1625978b66179b01978bebd3d3243623 (patch)
tree: 1a3f4c88be9bba7c2e6ac4f95ceede8e6fd82960 /bin
parent: 3a02d19162975b619ea11259ce763c1902837b81 (diff)
1 files changed, 88 insertions, 0 deletions
diff --git a/bin/gptest.py b/bin/gptest.py
new file mode 100755
index 0000000..dd29a2a
--- /dev/null
+++ b/bin/gptest.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+
+import sys
+import numpy as np
+from dfatool import PTAModel, RawData, regression_measures, pta_trace_to_aggregate
+from gplearn.genetic import SymbolicRegressor
+from multiprocessing import Pool
+
+def splitidx_srs(length):
+    shuffled = np.random.permutation(np.arange(length))
+    border = int(length * float(2) / 3)
+    training = shuffled[:border]
+    validation = shuffled[border:]
+    return (training, validation)
+
+def _gp_fit(arg):
+    param = arg[0]
+    X = arg[1]
+    Y = arg[2]
+    est_gp = SymbolicRegressor(
+        population_size = param[0],
+        generations = 450,
+        parsimony_coefficient = param[1],
+        function_set = param[2].split(' '),
+        const_range = (-param[3], param[3])
+    )
+
+    training, validation = splitidx_srs(len(Y))
+    X_train = X[training]
+    Y_train = Y[training]
+    X_validation = X[validation]
+    Y_validation = Y[validation]
+
+    try:
+        est_gp.fit(X_train, Y_train)
+        return (param, str(est_gp._program), est_gp._program.raw_fitness_, regression_measures(est_gp.predict(X_validation), Y_validation))
+    except Exception as e:
+        return (param, 'Exception: {}'.format(str(e)), 999999999)
+
+
+if __name__ == '__main__':
+    population_size = [100, 500, 1000, 2000, 5000, 10000]
+    parsimony_coefficient = [0.1, 0.5, 0.1, 1]
+    function_set = ['add mul', 'add mul sub div', 'add mul sub div sqrt log inv']
+    const_lim = [100000, 50000, 10000, 1000, 500, 10, 1]
+    filenames = sys.argv[4:]
+    raw_data = RawData(filenames)
+
+    preprocessed_data = raw_data.get_preprocessed_data()
+    by_name, parameters, arg_count = pta_trace_to_aggregate(preprocessed_data)
+    model = PTAModel(by_name, parameters, arg_count, traces = preprocessed_data)
+
+    by_param = model.by_param
+
+    state_or_tran = sys.argv[1]
+
+    model_attribute = sys.argv[2]
+
+    dimension = int(sys.argv[3])
+
+    X = [[] for i in range(dimension)]
+    Y = []
+
+
+    for key, val in by_param.items():
+        if key[0] == state_or_tran and len(key[1]) == dimension:
+            Y.extend(val[model_attribute])
+            for i in range(dimension):
+                X[i].extend([float(key[1][i])] * len(val[model_attribute]))
+
+
+    X = np.array(X)
+    Y = np.array(Y)
+
+    paramqueue = []
+
+    for popsize in population_size:
+        for coef in parsimony_coefficient:
+            for fs in function_set:
+                for cl in const_lim:
+                    for i in range(10):
+                        paramqueue.append(((popsize, coef, fs, cl), X.T, Y))
+
+    with Pool() as pool:
+        results = pool.map(_gp_fit, paramqueue)
+
+    for res in sorted(results, key=lambda r: r[2]):
+        print('{} {:.0f} ({:.0f})\n{}'.format(res[0], res[3]['mae'], res[2], res[1]))
author	Daniel Friesel <daniel.friesel@uos.de>	2019-10-08 08:22:50 +0200
committer	Daniel Friesel <daniel.friesel@uos.de>	2019-10-08 08:22:50 +0200
commit	30926c1b1625978b66179b01978bebd3d3243623 (patch)
tree	1a3f4c88be9bba7c2e6ac4f95ceede8e6fd82960 /bin
parent	3a02d19162975b619ea11259ce763c1902837b81 (diff)