summaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2021-11-12 15:18:38 +0100
committerDaniel Friesel <daniel.friesel@uos.de>2021-11-12 15:18:38 +0100
commitee0d122605120edfeff3837b8d358a1ef3ac5a3d (patch)
tree06028926edeeb747f0e76f38d9a602a26ad2f47d /bin
parent69d4a3e2c683dd1233e390804461fce9afce85cb (diff)
add enum→boolean converter for CART/ModelTree evaluations
Diffstat (limited to 'bin')
-rwxr-xr-xbin/observations-enum-to-boolean.py42
1 files changed, 42 insertions, 0 deletions
diff --git a/bin/observations-enum-to-boolean.py b/bin/observations-enum-to-boolean.py
new file mode 100755
index 0000000..fc7807c
--- /dev/null
+++ b/bin/observations-enum-to-boolean.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+
+import dfatool.utils
+import json
+import lzma
+import sys
+
+
+def main():
+ infile = sys.argv[1]
+ outfile = sys.argv[2]
+
+ with lzma.open(infile, "rt") as f:
+ observations = json.load(f)
+
+ distinct_param_values = dict()
+ replace_map = dict()
+
+ for observation in observations:
+ for k, v in observation["param"].items():
+ if not k in distinct_param_values:
+ distinct_param_values[k] = set()
+ distinct_param_values[k].add(v)
+
+ for param_name, distinct_values in distinct_param_values.items():
+ if len(distinct_values) > 2 and not all(
+ map(dfatool.utils.is_numeric, distinct_values)
+ ):
+ replace_map[param_name] = distinct_values
+
+ for observation in observations:
+ for k, v in replace_map.items():
+ enum_value = observation["param"].pop(k)
+ for binary_key in v:
+ observation["param"][binary_key] = int(enum_value == binary_key)
+
+ with lzma.open(outfile, "wt") as f:
+ json.dump(observations, f)
+
+
+if __name__ == "__main__":
+ main()