BikeNetKit · Manuel-Knepper · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026
diff --git a/FixBikeMVP.ipynb b/FixBikeMVP.ipynb
@@ -15,17 +15,14 @@
    "id": "1b6a69660258f567",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-03-05T14:44:35.560984Z",
-     "start_time": "2026-03-05T14:44:34.251482Z"
+     "end_time": "2026-03-06T13:24:09.401596Z",
+     "start_time": "2026-03-06T13:24:08.014186Z"
     }
    },
    "source": [
     "# import packages\n",
     "import pandas as pd\n",
     "import osmnx as ox\n",
-    "import networkx as nx\n",
-    "import random\n",
-    "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
     "import os\n",
     "\n",
@@ -48,8 +45,8 @@
    "id": "9173dd015d3280f3",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-03-05T14:44:43.976210Z",
-     "start_time": "2026-03-05T14:44:43.954651Z"
+     "end_time": "2026-03-06T13:24:10.609718Z",
+     "start_time": "2026-03-06T13:24:10.596152Z"
     }
    },
    "source": [
@@ -79,8 +76,8 @@
    "id": "a9a51035deb92425",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-03-05T14:45:07.394672Z",
-     "start_time": "2026-03-05T14:44:45.668432Z"
+     "end_time": "2026-03-06T13:24:34.025385Z",
+     "start_time": "2026-03-06T13:24:12.224519Z"
     }
    },
    "source": [
@@ -101,8 +98,8 @@
    "id": "a8b4eed5",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-03-05T14:45:15.988833Z",
-     "start_time": "2026-03-05T14:45:13.924067Z"
+     "end_time": "2026-03-06T13:24:46.446529Z",
+     "start_time": "2026-03-06T13:24:44.423659Z"
     }
    },
    "source": [
@@ -137,8 +134,8 @@
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-03-05T14:45:22.101582Z",
-     "start_time": "2026-03-05T14:45:21.982420Z"
+     "end_time": "2026-03-06T13:24:47.841310Z",
+     "start_time": "2026-03-06T13:24:47.728797Z"
     }
    },
    "cell_type": "code",
@@ -164,8 +161,8 @@
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-03-05T14:47:06.726898Z",
-     "start_time": "2026-03-05T14:45:23.501207Z"
+     "end_time": "2026-03-06T13:26:35.471171Z",
+     "start_time": "2026-03-06T13:24:49.019530Z"
     }
    },
    "cell_type": "code",
@@ -179,8 +176,8 @@
    "id": "e2670fe4",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-03-05T14:47:09.810662Z",
-     "start_time": "2026-03-05T14:47:09.732303Z"
+     "end_time": "2026-03-06T13:31:14.903844Z",
+     "start_time": "2026-03-06T13:31:14.725758Z"
     }
    },
    "source": [
@@ -222,8 +219,8 @@
    "id": "ac84d5bc",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-03-05T14:47:11.808025Z",
-     "start_time": "2026-03-05T14:47:11.457935Z"
+     "end_time": "2026-03-06T13:31:16.805584Z",
+     "start_time": "2026-03-06T13:31:16.458067Z"
     }
    },
    "source": [
@@ -248,8 +245,8 @@
    "id": "c6bc1fa5",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-03-05T14:47:13.884562Z",
-     "start_time": "2026-03-05T14:47:13.788483Z"
+     "end_time": "2026-03-06T13:31:18.314233Z",
+     "start_time": "2026-03-06T13:31:18.226835Z"
     }
    },
    "source": "G = weigh_edges(G, penalty)",
@@ -271,8 +268,8 @@
    "id": "602e50f5",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-03-05T14:47:22.674995Z",
-     "start_time": "2026-03-05T14:47:22.515390Z"
+     "end_time": "2026-03-06T13:31:20.049312Z",
+     "start_time": "2026-03-06T13:31:19.891469Z"
     }
    },
    "source": [
@@ -288,7 +285,7 @@
      ]
     }
    ],
-   "execution_count": 11
+   "execution_count": 10
   },
   {
    "cell_type": "markdown",
@@ -297,39 +294,32 @@
    "source": [
     "### Create list of all potential gaps\n",
     "\n",
-    "Here defined as: contact node pair combinations **within `maxgap` euclidean distance from each other** "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a02eeca1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "potential_gaps = []\n",
-    "\n",
-    "for node in contact_nodes:\n",
-    "    node_buffer = nodes_gdf.loc[node, \"geometry\"].buffer(maxgap)\n",
-    "    q = nodes_gdf.sindex.query(node_buffer, predicate=\"intersects\")\n",
-    "    neighbours = list(nodes_gdf.iloc[q].index)\n",
-    "    # convention: sort by ascending OSMID...\n",
-    "    node_pairs = [tuple(sorted(z)) for z in zip([node]*len(neighbours), neighbours)]\n",
-    "    potential_gaps += node_pairs\n",
-    "\n",
-    "# ... so that we can easily deduplicate\n",
-    "potential_gaps = list(set(potential_gaps))"
+    "Here defined as: contact node pair combinations **within `maxgap` euclidean distance from each other**"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "a01da088",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-06T13:31:24.244710Z",
+     "start_time": "2026-03-06T13:31:21.777852Z"
+    }
+   },
    "source": [
+    "potential_gaps = find_potential_gaps(contact_nodes, nodes_gdf, maxgap)\n",
     "print(\"potential gaps found:\", len(potential_gaps))"
-   ]
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "potential gaps found: 142482\n"
+     ]
+    }
+   ],
+   "execution_count": 11
   },
   {
    "cell_type": "markdown",
@@ -345,35 +335,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "43c578c3",
-   "metadata": {},
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-06T13:31:31.246886Z",
+     "start_time": "2026-03-06T13:31:26.417817Z"
+    }
+   },
+   "source": "found_gaps, found_gaps_nsp = find_actual_gaps(G, potential_gaps)",
    "outputs": [],
-   "source": [
-    "pbi_dict = nx.get_edge_attributes(G, \"pbi\")\n",
-    "\n",
-    "found_gaps = []\n",
-    "found_gaps_nsp = [] # naive shortest paths (by length, in node list format)\n",
-    "\n",
-    "for i, gap in enumerate(potential_gaps):\n",
-    "    u, v = gap\n",
-    "    nodelist = nx.shortest_path(\n",
-    "        G=G,\n",
-    "        source=u,\n",
-    "        target=v, \n",
-    "        weight=\"length\"\n",
-    "    )\n",
-    "    pbis = set([pbi_dict[tuple(sorted(z))] for z in zip(nodelist, nodelist[1:])])\n",
-    "    \n",
-    "    # confirm that it is an actual gap if it consists only of pbi==0 infra:\n",
-    "    if pbis==set([0]): \n",
-    "        found_gaps.append(gap)\n",
-    "        found_gaps_nsp.append(nodelist)\n",
-    "    \n",
-    "    # # (manual timer)\n",
-    "    # if i % 100000 == 0:\n",
-    "    #     print(i)\n"
-   ]
+   "execution_count": 12
   },
   {
    "cell_type": "markdown",
@@ -385,17 +356,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "b5ca9fdf440935d8",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-06T13:31:33.093955Z",
+     "start_time": "2026-03-06T13:31:33.006586Z"
+    }
+   },
    "source": [
     "edgelist = []\n",
     "for nodelist in found_gaps_nsp:\n",
     "    edgelist += [tuple(sorted(z)) for z in zip(nodelist, nodelist[1:])]\n",
     "# deduplicate\n",
     "edgelist = list(set(edgelist))"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": 13
   },
   {
    "cell_type": "markdown",
@@ -407,38 +383,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "b1f0c683",
-   "metadata": {},
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-06T13:32:08.385205Z",
+     "start_time": "2026-03-06T13:31:34.994480Z"
+    }
+   },
+   "source": "ebc = compute_local_betweenness_centrality(G, nodes_gdf, radius)",
    "outputs": [],
-   "source": [
-    "# set current ebc value of all G edges to 0\n",
-    "for edge in G.edges:\n",
-    "    G.edges[edge][\"ebc\"] = 0\n",
-    "\n",
-    "# create dict that will be updated at each step\n",
-    "ebc = nx.get_edge_attributes(G, \"ebc\")\n",
-    "\n",
-    "# for each node, compute \"local\" ebc (buffered with radius!)\n",
-    "# for comp feas, now only subset of randomly drawn 100 nodes\n",
-    "random.seed(1312)\n",
-    "random_nodes = random.choices(list(G.nodes), k = 100)\n",
-    "for node in random_nodes:\n",
-    "    node_buffer = nodes_gdf.loc[node, \"geometry\"].buffer(radius)\n",
-    "    q = nodes_gdf.sindex.query(node_buffer, predicate=\"intersects\")\n",
-    "    neighbours = list(nodes_gdf.iloc[q].index)\n",
-    "    local_ebc = nx.edge_betweenness_centrality_subset(\n",
-    "        G=G,\n",
-    "        sources=[node],\n",
-    "        targets=neighbours,\n",
-    "        normalized=False, # important! otherwise the addition makes no sense\n",
-    "        weight=\"weight\"# using penalty for non-pbi\n",
-    "    )\n",
-    "\n",
-    "    # update ebc dictionary\n",
-    "    for k, v in local_ebc.items():\n",
-    "        ebc[k] += v # updating ebc!!"
-   ]
+   "execution_count": 14
   },
   {
    "cell_type": "markdown",
@@ -450,19 +404,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "4c920f49e57db5c",
-   "metadata": {},
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-06T13:32:13.235834Z",
+     "start_time": "2026-03-06T13:32:12.972818Z"
+    }
+   },
+   "source": "Bs = rank_gaps_by_b(found_gaps_nsp, G, ebc)",
    "outputs": [],
-   "source": [
-    "Bs = []\n",
-    "for nodelist in found_gaps_nsp:\n",
-    "    edgelist = [tuple(sorted(z)) for z in zip(nodelist, nodelist[1:])]\n",
-    "    lengths = np.array([G.edges[edge][\"length\"] for edge in edgelist])\n",
-    "    ebcs = np.array([ebc[edge] for edge in edgelist])\n",
-    "    B = sum(lengths * ebcs)/sum(lengths)\n",
-    "    Bs.append(B)"
-   ]
+   "execution_count": 15
   },
   {
    "cell_type": "markdown",