#!/usr/bin/env python3 # vim:tabstop=4 softtabstop=4 shiftwidth=4 textwidth=160 smarttab expandtab colorcolumn=160 # # Copyright (C) 2021 Daniel Friesel # # SPDX-License-Identifier: BSD-2-Clause from datetime import datetime, timedelta from geopy.distance import distance from progress.bar import Bar import csv import json import numpy as np import os import psycopg2 import psycopg2.extras import requests import sys class ProgressBar(Bar): sma_window = 500 suffix = "%(percent).0f%% [%(elapsed_td)s/%(eta_td)s]" conn = psycopg2.connect( dbname=os.getenv("GEOLOOKUP_DBNAME", "geo_to_stations"), user=os.getenv("GEOLOOKUP_DBUSER", "geo_to_stations"), password=os.getenv("GEOLOOKUP_DBPASS"), host=os.getenv("GEOLOOKUP_DBHOST", "localhost"), ) shape = dict() stops_by_latlon = dict() routes_by_shape_id = dict() trips_by_shape_id = dict() name_to_eva = dict() eva_to_name = dict() # lon + 0.003 ≙ 190 to 220 m # lat + 0.003 ≙ 333 m lut_grid_step = 100 try: with open("data/iris-stations.json", "r") as f: for station in json.load(f): name_to_eva[station["name"]] = int(station["eva"]) eva_to_name[int(station["eva"])] = station["name"] except FileNotFoundError: print( "populate-lut requires a list of IRIS stations. Please run the following commands:" ) print() print("mkdir -p data") print( "curl https://git.finalrewind.org/Travel-Status-DE-IRIS/plain/share/stations.json > data/iris-stations.json" ) print() sys.exit(1) try: with open("data/nvbw/trips.txt", "r") as f: pass with open("data/nvbw/shapes.txt", "r") as f: pass with open("data/nvbw/stop_times.txt", "r") as f: pass except FileNotFoundError: print("populate-lut requires GTFS shapes of regional transit lines.") print( "At present, the best known resource is ." ) print( "(https://www.nvbw.de/fileadmin/user_upload/service/open_data/fahrplandaten_mit_liniennetz/bwspnv.zip)" ) print("Please download and extract it to data/nvbw.") sys.exit(1) print("Loading trips ...") with open("data/nvbw/trips.txt", "r") as f: f.readline() cr = csv.reader(f) for row in cr: route_id, trip_id, service_id, direction_id, block_id, shape_id = row if shape_id not in routes_by_shape_id: routes_by_shape_id[shape_id] = list() routes_by_shape_id[shape_id].append(route_id) if shape_id not in trips_by_shape_id: trips_by_shape_id[shape_id] = list() trips_by_shape_id[shape_id].append(trip_id) print("Loading stop_times ...") stops_by_tripid = dict() with open("data/nvbw/stop_times.txt", "r") as f: f.readline() cr = csv.reader(f) for row in cr: ( trip_id, stop_id, arrival_time, departure_time, stop_seq, stop_headsign, pickup_type, dropoff_type, dist, ) = row if trip_id not in stops_by_tripid: stops_by_tripid[trip_id] = list() stops_by_tripid[trip_id].append((stop_headsign, float(dist))) print("Loading shapes ...") with open("data/nvbw/shapes.txt", "r") as f: f.readline() cr = csv.reader(f) prev_lat, prev_lon = None, None prev_dist = 0 for row in cr: shape_id, _, lat, lon, dist = row if shape_id not in shape: shape[shape_id] = list() prev_dist = 0 lat = float(lat) lon = float(lon) dist = float(dist) if dist > prev_dist and dist - prev_dist > lut_grid_step: # ensure shape entries are no more than lut_grid_step meters apart for i in np.arange(lut_grid_step, dist - prev_dist, lut_grid_step): ratio = i / (dist - prev_dist) assert 0 <= ratio <= 1 rel_lat = prev_lat * ratio + lat * (1 - ratio) rel_lon = prev_lon * ratio + lon * (1 - ratio) shape[shape_id].append((rel_lat, rel_lon, dist)) shape[shape_id].append((lat, lon, dist)) prev_dist = dist prev_lat = lat prev_lon = lon def add_stops(lat, lon, stops): evas = list() for stop in stops: try: evas.append(name_to_eva[stop]) except KeyError: try: evas.append(name_to_eva[stop.replace(" (", "(")]) except KeyError: pass add_evas(lat, lon, evas) def add_evas(lat, lon, evas): lut_lat_center = round(lat * 1000) lut_lon_center = round(lon * 1000) for lut_lat in range(lut_lat_center - 0, lut_lat_center + 1): for lut_lon in range(lut_lon_center - 0, lut_lon_center + 1): if (lut_lat, lut_lon) not in stops_by_latlon: stops_by_latlon[(lut_lat, lut_lon)] = set() stops_by_latlon[(lut_lat, lut_lon)].update(evas) # Here be dragons. I don't recall what this code does. It shouldn't be too complicated, though. num_shapes = len(shape.keys()) for shape_id in ProgressBar("Calculating neighoubrs", max=num_shapes).iter( shape.keys() ): for trip_id in trips_by_shape_id[shape_id]: stops = stops_by_tripid[trip_id] first_stop = stops[0] last_stop = stops[-1] for lat, lon, shape_dist in shape[shape_id]: assert first_stop[1] <= shape_dist <= last_stop[1] for i, (stop_name, stop_dist) in enumerate(stops): if ( stop_dist <= shape_dist and i + 1 < len(stops) and stops[i + 1][1] >= shape_dist ): add_stops(lat, lon, (stop_name, stops[i + 1][0])) try: class Polyline: def __init__(self, json_data): self.coordinates = json_data["polyline"] with open("data/polydump.json", "r") as f: polylines = list(map(Polyline, json.load(f))) def add_leg(coordinates, from_eva, to_eva): for lat, lon in coordinates: add_evas(lat, lon, (from_eva, to_eva)) for polyline in ProgressBar("Adding polydump data", max=len(polylines)).iter( polylines ): prev_eva = None leg = list() for coord in polyline.coordinates: lat = coord[1] lon = coord[0] if leg: prev_lat = leg[-1][0] prev_lon = leg[-1][1] prev_dist = distance((prev_lat, prev_lon), (lat, lon)).m for i in np.arange(lut_grid_step, prev_dist, lut_grid_step): ratio = i / prev_dist assert 0 <= ratio <= 1 rel_lat = prev_lat * ratio + lat * (1 - ratio) rel_lon = prev_lon * ratio + lon * (1 - ratio) leg.append((rel_lat, rel_lon)) leg.append((lat, lon)) if len(coord) > 2 and coord[2] != prev_eva: if prev_eva: add_leg(leg, prev_eva, coord[2]) prev_eva = coord[2] leg = list() except FileNotFoundError: pass num_latlons = len(stops_by_latlon.keys()) with conn.cursor() as cur: cur.execute("drop table if exists stations") cur.execute( """create table stations ( lat integer not null, lon integer not null, stations jsonb not null, primary key (lat, lon) ) """ ) insert_groups = list() insert_group = list() for (lat, lon), stops in stops_by_latlon.items(): insert_group.append((lat, lon, json.dumps(list(stops)))) if len(insert_group) >= 100: insert_groups.append(insert_group) insert_group = list() insert_groups.append(insert_group) for insert_group in ProgressBar("Inserting coordinates", max=len(insert_groups)).iter( insert_groups ): with conn.cursor() as cur: psycopg2.extras.execute_values( cur, """insert into stations (lat, lon, stations) values %s""", insert_group ) conn.commit()