summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <derf@finalrewind.org>2020-12-27 05:55:04 +0100
committerDaniel Friesel <derf@finalrewind.org>2020-12-27 05:55:04 +0100
commit13a6832bb4f9ec68d26ae59c8dd1260279e3e70a (patch)
tree1262192a82a5beeeffd287dbdf0d535fe66df974
parentb12626394101807d2644b92ffb50dcae2b0febfa (diff)
heuristics, whoop whoop
-rwxr-xr-xconvert.zsh46
-rwxr-xr-xlib/export-carriage.py90
2 files changed, 121 insertions, 15 deletions
diff --git a/convert.zsh b/convert.zsh
index eb5bb25..c37a208 100755
--- a/convert.zsh
+++ b/convert.zsh
@@ -4,11 +4,7 @@ setopt err_exit no_unset
mkdir -p svg pdf png
-# bbox: left bottom right top
-
# S. 6, 11ff
-ice1_bbox='78 45 830 198'
-#ice1_bbox='95 59 825 150' # doesn't work for 8029, 8031
ice1_offset=11
# Stelle 6 bis 9 der UIC-Wagennr.
typeset -a ice1_types=(
@@ -253,7 +249,7 @@ x
1101 # DBpdzfa # 110.A
)
-function extract_wagons() {
+function extract_wagons {
start=$1
shift
bbox=$1
@@ -301,18 +297,38 @@ function extract_wagons() {
done
}
+function extract_wagons_heuristic {
+ start=$1
+ shift
+
+ for i in {1..$#}; do
+ target=$@[$i]
+
+ if [[ $target == x ]]; then
+ continue
+ fi
+
+ echo "Page $(( start + i - 1 )): $target"
+ inkscape --export-filename=svg/${target}.svg tmp-$(( start + i - 1 )).pdf &> /dev/null
+ lib/export-carriage.py svg/${target}.svg png/${target}.png
+ done
+}
+
pdfseparate Fahrzeuglexikon_2020.pdf tmp-%d.pdf
-extract_wagons $ice1_offset $ice1_bbox $ice1_types
-extract_wagons $ice2_offset $ice2_bbox $ice2_types
-extract_wagons $ice3_403_1_offset $ice3_403_1_bbox $ice3_403_1_types
-extract_wagons $ice3_403_2_offset $ice3_403_2_bbox $ice3_403_2_types
-extract_wagons $ice3_403_r_offset $ice3_403_r_bbox $ice3_403_r_types
-extract_wagons $ice3_406_offset $ice3_406_bbox $ice3_406_types
-extract_wagons $ice3_406_r_offset $ice3_406_r_bbox $ice3_406_r_types
-extract_wagons $ice3_407_offset $ice3_407_bbox $ice3_407_types
-extract_wagons $ice4_offset $ice4_bbox $ice4_types
-extract_wagons $ic1_offset $ic1_bbox $ic1_types
+extract_wagons_heuristic $ice1_offset $ice1_types
+extract_wagons_heuristic $ice2_offset $ice2_types
+extract_wagons_heuristic $ice3_403_1_offset $ice3_403_1_types
+extract_wagons_heuristic $ice3_403_2_offset $ice3_403_2_types
+extract_wagons_heuristic $ice3_403_r_offset $ice3_403_r_types
+extract_wagons_heuristic $ice3_406_offset $ice3_406_types
+extract_wagons_heuristic $ice3_406_r_offset $ice3_406_r_types
+extract_wagons_heuristic $ice3_407_offset $ice3_407_types
+extract_wagons_heuristic $ice4_offset $ice4_types
+extract_wagons_heuristic $icet_411_s1_offset $icet_411_s1_types
+extract_wagons_heuristic $icet_411_s2_offset $icet_411_s2_types
+extract_wagons_heuristic $icet_415_offset $icet_415_types
+extract_wagons_heuristic $ic1_offset $ic1_types
extract_wagons $ic2_bt_offset $ic2_bt_bbox $ic2_bt_types
extract_wagons $ic2_sk_offset $ic2_sk_bbox $ic2_sk_types
diff --git a/lib/export-carriage.py b/lib/export-carriage.py
new file mode 100755
index 0000000..69e5133
--- /dev/null
+++ b/lib/export-carriage.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+
+import numpy as np
+import subprocess
+import sys
+
+
+class SVGObject:
+ def __init__(self, attrlist):
+ self.id = attrlist[0]
+ self.x = float(attrlist[1])
+ self.y = float(attrlist[2])
+ self.w = float(attrlist[3])
+ self.h = float(attrlist[4])
+ self.x_ = self.x + self.w
+ self.y_ = self.y + self.h
+
+ def is_path(self):
+ return self.id.startswith("path")
+
+ def is_tspan(self):
+ return self.id.startswith("tspan")
+
+ def __repr__(self):
+ return "{}<x={}, y={}, w={}, h={}>".format(
+ self.id, self.x, self.y, self.w, self.h
+ )
+
+
+def main(infile, outfile):
+ ret = subprocess.run(
+ ["inkscape", "--query-all", infile], capture_output=True, check=True
+ )
+
+ objects = ret.stdout.decode("ascii").split("\n")
+
+ # remove last (empty) line
+ objects.pop()
+
+ # objects = [[ID, X, Y, W, H], ... ]
+ objects = list(map(lambda x: x.split(","), objects))
+ objects = list(map(lambda x: SVGObject(x), objects))
+
+ ys = list()
+ for o in objects:
+ if o.is_path() and o.h > 2 and o.h < 4 and o.w > 10:
+ ys.append(o.y)
+ bins = np.bincount(ys)
+
+ candidates = list(filter(lambda i: bins[i] > 2, range(len(bins))))
+
+ crop_y_min = np.min(candidates) - 1
+ crop_y_max = np.max(candidates) + 4
+
+ objects_to_delete = list()
+ x_positions = list()
+
+ for o in objects:
+ if (o.is_path or o.is_tspan) and (o.y < crop_y_min or o.y_ > crop_y_max):
+ objects_to_delete.append(o.id)
+ elif o.is_path and o.y >= crop_y_min and o.y_ <= crop_y_max:
+ x_positions.append(o.x)
+ x_positions.append(o.x_)
+
+ crop_x_min = int(np.min(x_positions))
+ crop_x_max = int(np.max(x_positions)) + 1
+
+ objects_to_delete = ",".join(objects_to_delete)
+
+ export_area = f"--export-area={crop_x_min}:{crop_y_min}:{crop_x_max}:{crop_y_max}"
+ select_objects = f"--select={objects_to_delete}"
+
+ subprocess.run(
+ [
+ "xvfb-run",
+ "inkscape",
+ export_area,
+ select_objects,
+ "--verb=EditDelete",
+ "--batch-process",
+ "--export-dpi=600",
+ "-o",
+ outfile,
+ infile,
+ ]
+ )
+
+
+if __name__ == "__main__":
+ main(*sys.argv[1:])