diff options
author | Daniel Friesel <daniel.friesel@uos.de> | 2021-11-12 15:18:38 +0100 |
---|---|---|
committer | Daniel Friesel <daniel.friesel@uos.de> | 2021-11-12 15:18:38 +0100 |
commit | ee0d122605120edfeff3837b8d358a1ef3ac5a3d (patch) | |
tree | 06028926edeeb747f0e76f38d9a602a26ad2f47d /bin | |
parent | 69d4a3e2c683dd1233e390804461fce9afce85cb (diff) |
add enum→boolean converter for CART/ModelTree evaluations
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/observations-enum-to-boolean.py | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/bin/observations-enum-to-boolean.py b/bin/observations-enum-to-boolean.py new file mode 100755 index 0000000..fc7807c --- /dev/null +++ b/bin/observations-enum-to-boolean.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 + +import dfatool.utils +import json +import lzma +import sys + + +def main(): + infile = sys.argv[1] + outfile = sys.argv[2] + + with lzma.open(infile, "rt") as f: + observations = json.load(f) + + distinct_param_values = dict() + replace_map = dict() + + for observation in observations: + for k, v in observation["param"].items(): + if not k in distinct_param_values: + distinct_param_values[k] = set() + distinct_param_values[k].add(v) + + for param_name, distinct_values in distinct_param_values.items(): + if len(distinct_values) > 2 and not all( + map(dfatool.utils.is_numeric, distinct_values) + ): + replace_map[param_name] = distinct_values + + for observation in observations: + for k, v in replace_map.items(): + enum_value = observation["param"].pop(k) + for binary_key in v: + observation["param"][binary_key] = int(enum_value == binary_key) + + with lzma.open(outfile, "wt") as f: + json.dump(observations, f) + + +if __name__ == "__main__": + main() |