From d18752205d0a9558332f97dd9cfdf96437854d04 Mon Sep 17 00:00:00 2001 From: Abhinav Chaudhary Date: Wed, 13 May 2026 02:15:20 +0530 Subject: [PATCH 1/6] Create README.md --- .../abhinav-chaudhary/level6/README.md | 162 ++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 submissions/abhinav-chaudhary/level6/README.md diff --git a/submissions/abhinav-chaudhary/level6/README.md b/submissions/abhinav-chaudhary/level6/README.md new file mode 100644 index 000000000..771035e7b --- /dev/null +++ b/submissions/abhinav-chaudhary/level6/README.md @@ -0,0 +1,162 @@ +# Factory Knowledge Graph Dashboard + +A graph-based production dashboard for a Swedish steel fabrication factory. +Data from three CSVs is loaded into Neo4j as a knowledge graph, then explored through a Streamlit app with Plotly charts. + +Built for the Level 6 factory graph challenge focused on Neo4j and Streamlit.. + +\--- + +## What it does + +The factory runs 8 projects across 9 production stations with 13 workers. +Raw CSV data (production runs, worker assignments, weekly capacity) is converted into a Neo4j graph with 9 node labels and 12 relationship types. + +The dashboard has five pages: + +|Page|What it shows| +|-|-| +|**Project Overview**|Variance heatmap — which projects are ahead or behind plan, by week| +|**Station Load**|Planned vs actual hours per station, overrun highlighting| +|**Capacity Tracker**|Weekly capacity breakdown (own / hired / overtime) vs demand| +|**Worker Coverage**|Which workers can cover each station, cert gaps, full roster| +|**Self-Test**|Schema validation — connection, node counts, rel counts, edge properties| + +\--- + +## Tech stack + +* **Python 3.11+** +* **Neo4j 5.x** — graph database +* **Streamlit** — dashboard UI +* **Plotly** — interactive charts +* **Pandas** — dataframe wrangling +* **python-dotenv** — local credential management + +\--- + +## Project structure + +``` +. +├── app.py # Streamlit dashboard +├── seed\_graph.py # Loads CSVs → Neo4j graph +├── factory\_production.csv # Production runs data +├── factory\_workers.csv # Worker \& certification data +├── factory\_capacity.csv # Weekly capacity snapshots +├── schema.md # Graph schema (Mermaid diagram + reference tables) +├── answers.md # Level 5/6 analysis write-up +├── .env # Your credentials (not committed) +└── README.md +``` + +\--- + +## Setup + +### 1\. Clone and install dependencies + +```bash +git clone +cd + +python -m venv venv +source venv/bin/activate # Windows: venv\\Scripts\\activate + +pip install streamlit neo4j pandas plotly python-dotenv +``` + +### 2\. Start Neo4j + +You can use [Neo4j Desktop](https://neo4j.com/download/), [AuraDB](https://neo4j.com/cloud/aura-free/) (free tier), or Docker: + +```bash +docker run \\ + --name neo4j-factory \\ + -p 7474:7474 -p 7687:7687 \\ + -e NEO4J\_AUTH=neo4j/yourpassword \\ + neo4j:5 +``` + +### 3\. Create your `.env` + +Create a `.env` file in the project root: + +```env +NEO4J\_URI=bolt://localhost:7687 +NEO4J\_USER=neo4j +NEO4J\_PASSWORD=yourpassword +``` + +> For AuraDB, use the `neo4j+s://` URI from your instance dashboard. + +### 4\. Seed the graph + +This loads all three CSVs into Neo4j. Run it once (or re-run after a `MATCH (n) DETACH DELETE n` to reset): + +```bash +python seed\_graph.py +``` + +### 5\. Run the dashboard + +```bash +streamlit run app.py +``` + +Open [http://localhost:8501](http://localhost:8501) in your browser. + +\--- + +## Streamlit Cloud deployment + +1. Push the repo to GitHub (make sure `.env` is in `.gitignore`). +2. Go to [share.streamlit.io](https://share.streamlit.io) and connect the repo. +3. Add your credentials under **Settings → Secrets**: + +```toml +NEO4J\_URI = "neo4j+s://xxxx.databases.neo4j.io" +NEO4J\_USER = "neo4j" +NEO4J\_PASSWORD = "yourpassword" +``` + +The app reads `st.secrets` first, so no other changes needed. + +\--- + +## Graph schema (quick reference) + +**9 node labels:** `Project` · `ProductionEntry` · `Station` · `Product` · `Worker` · `Week` · `CapacitySnapshot` · `Certification` · `BOP` + +**12 relationship types:** `HAS\_RUN` · `USES\_PRODUCT` · `PROCESSED\_AT` · `SCHEDULED\_IN` · `REQUIRES\_STATION` · `STRUCTURED\_BY` · `PRIMARILY\_AT` · `CAN\_COVER` · `WORKED\_ON` · `HOLDS` · `REQUIRES\_CERT` · `HAS\_SNAPSHOT` + +Two relationships carry data properties: + +* `PROCESSED\_AT` → `planned\_hours`, `actual\_hours`, `completed\_units` +* `SCHEDULED\_IN` → `planned\_hours`, `actual\_hours` + +See `schema.md` for the full Mermaid diagram and relationship reference table. + +\--- + +## Running the self-test + +Navigate to the **Self-Test** page in the sidebar and click **▶ Run All Tests**. + +It checks: + +1. Neo4j connection +2. All 9 node labels present +3. All 12 relationship types present +4. Node count ≥ 50 +5. Relationship count ≥ 100 +6. Variance query returns results from `PROCESSED\_AT` edge properties + +\--- + +## Notes + +* Data is cached for 5 minutes (`@st.cache\_data(ttl=300)`) — refresh the page or restart Streamlit to force a reload. +* All dashboard queries run directly against Neo4j after the graph is seeded. +* `seed\_graph.py` is idempotent — it uses `MERGE` throughout, so re-running won't duplicate data. + From 673754f5248ce79762328e416e2ce507b248aa0d Mon Sep 17 00:00:00 2001 From: Abhinav Chaudhary Date: Wed, 13 May 2026 02:16:59 +0530 Subject: [PATCH 2/6] Add files via upload --- .../level6/DASHBOARD_URL.txt | 0 submissions/abhinav-chaudhary/level6/app.py | 1166 +++++++++++++++++ .../level6/factory_capacity.csv | 9 + .../level6/factory_production.csv | 69 + .../level6/factory_workers.csv | 15 + .../abhinav-chaudhary/level6/requirements.txt | 5 + .../abhinav-chaudhary/level6/seed_graph.py | 717 ++++++++++ 7 files changed, 1981 insertions(+) create mode 100644 submissions/abhinav-chaudhary/level6/DASHBOARD_URL.txt create mode 100644 submissions/abhinav-chaudhary/level6/app.py create mode 100644 submissions/abhinav-chaudhary/level6/factory_capacity.csv create mode 100644 submissions/abhinav-chaudhary/level6/factory_production.csv create mode 100644 submissions/abhinav-chaudhary/level6/factory_workers.csv create mode 100644 submissions/abhinav-chaudhary/level6/requirements.txt create mode 100644 submissions/abhinav-chaudhary/level6/seed_graph.py diff --git a/submissions/abhinav-chaudhary/level6/DASHBOARD_URL.txt b/submissions/abhinav-chaudhary/level6/DASHBOARD_URL.txt new file mode 100644 index 000000000..e69de29bb diff --git a/submissions/abhinav-chaudhary/level6/app.py b/submissions/abhinav-chaudhary/level6/app.py new file mode 100644 index 000000000..c37a2ef2f --- /dev/null +++ b/submissions/abhinav-chaudhary/level6/app.py @@ -0,0 +1,1166 @@ +""" +app.py — Level 6 Factory Knowledge Graph Dashboard +==================================================== +Swedish steel fabrication factory · Neo4j 5.x · Streamlit · Plotly + +Pages +----- +1. Project Overview — project timeline heatmap (variance % by week) +2. Station Load — planned vs actual hours, overrun highlighting +3. Capacity Tracker — weekly capacity breakdown + deficit line +4. Worker Coverage — coverage matrix with cert gaps +5. Self-Test — full schema validation suite + +Connection +---------- +Reads credentials from (in priority order): + 1. Streamlit Cloud secrets → st.secrets["NEO4J_URI"] etc. + 2. Local .env → NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD + +Usage +----- + pip install streamlit neo4j pandas plotly python-dotenv + streamlit run app.py +""" + +# ───────────────────────────────────────────────────────────────────────────── +# Imports +# ───────────────────────────────────────────────────────────────────────────── +import os +import textwrap +import traceback + +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +import streamlit as st +from neo4j import GraphDatabase, exceptions as neo4j_exc + +# Load .env for local development (no-op if file absent or already set) +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + pass # python-dotenv not required on Streamlit Cloud + +# ───────────────────────────────────────────────────────────────────────────── +# Page config — must be first Streamlit call +# ───────────────────────────────────────────────────────────────────────────── +st.set_page_config( + page_title="Factory Graph Dashboard", + page_icon="🏗️", + layout="wide", + initial_sidebar_state="expanded", +) + +# ───────────────────────────────────────────────────────────────────────────── +# Global CSS — clean modern style +# ───────────────────────────────────────────────────────────────────────────── +st.markdown( + """ + + """, + unsafe_allow_html=True, +) + +# ───────────────────────────────────────────────────────────────────────────── +# Neo4j connection helper +# ───────────────────────────────────────────────────────────────────────────── + +def _get_credentials() -> tuple[str, str, str]: + """ + Resolve Neo4j credentials. + + Priority: + 1. Streamlit Cloud secrets (st.secrets) + 2. Environment variables (set by .env or shell) + """ + try: + uri = st.secrets["NEO4J_URI"] + user = st.secrets["NEO4J_USER"] + pw = st.secrets["NEO4J_PASSWORD"] + return uri, user, pw + except (KeyError, FileNotFoundError): + pass + + uri = os.getenv("NEO4J_URI", "bolt://localhost:7687") + user = os.getenv("NEO4J_USER", "neo4j") + pw = os.getenv("NEO4J_PASSWORD", "") + return uri, user, pw + + +@st.cache_resource(show_spinner="Connecting to Neo4j …") +def get_driver(): + """Return a cached Neo4j driver. Raises on auth failure.""" + uri, user, pw = _get_credentials() + driver = GraphDatabase.driver(uri, auth=(user, pw)) + driver.verify_connectivity() + return driver + + +def run_query(cypher: str, params: dict | None = None) -> list[dict]: + """Execute a Cypher read query and return rows as list-of-dicts.""" + driver = get_driver() + with driver.session() as session: + result = session.run(cypher, params or {}) + return [dict(record) for record in result] + + +# ───────────────────────────────────────────────────────────────────────────── +# Cached data loaders (TTL = 5 min — avoid hammering DB on every widget tick) +# ───────────────────────────────────────────────────────────────────────────── + +@st.cache_data(ttl=300, show_spinner=False) +def load_project_overview() -> pd.DataFrame: + """ + Project × Week variance heatmap data. + variance_pct = round(100*(actual-planned)/planned, 1) + """ + cypher = """ + MATCH (proj:Project)-[:HAS_RUN]->(entry:ProductionEntry)-[r:PROCESSED_AT]->(s:Station) + MATCH (entry)-[:SCHEDULED_IN]->(w:Week) + WITH proj, w, + sum(r.planned_hours) AS planned, + sum(r.actual_hours) AS actual + RETURN + proj.project_id AS project_id, + proj.project_name AS project, + proj.project_number AS project_number, + w.week_id AS week, + round(planned, 1) AS planned_hours, + round(actual, 1) AS actual_hours, + round(100.0 * (actual - planned) / planned, 1) AS variance_pct + ORDER BY proj.project_id, w.week_id + """ + rows = run_query(cypher) + return pd.DataFrame(rows) + + +@st.cache_data(ttl=300, show_spinner=False) +def load_project_summary() -> pd.DataFrame: + """Per-project total planned vs actual hours and entry count.""" + cypher = """ + MATCH (proj:Project)-[:HAS_RUN]->(entry:ProductionEntry)-[r:PROCESSED_AT]->(s:Station) + WITH proj, + count(DISTINCT entry) AS runs, + round(sum(r.planned_hours), 1) AS total_planned, + round(sum(r.actual_hours), 1) AS total_actual + RETURN + proj.project_number AS project_number, + proj.project_name AS project_name, + proj.etapp AS etapp, + runs, + total_planned, + total_actual, + round(100.0 * (total_actual - total_planned) / total_planned, 1) AS variance_pct + ORDER BY proj.project_id + """ + rows = run_query(cypher) + return pd.DataFrame(rows) + + +@st.cache_data(ttl=300, show_spinner=False) +def load_station_load() -> pd.DataFrame: + """Station × Week: planned vs actual hours with delta.""" + cypher = """ + MATCH (entry:ProductionEntry)-[r:PROCESSED_AT]->(s:Station) + MATCH (entry)-[:SCHEDULED_IN]->(w:Week) + RETURN + s.station_code AS station_code, + s.station_name AS station_name, + w.week_id AS week, + round(sum(r.planned_hours), 1) AS planned_hours, + round(sum(r.actual_hours), 1) AS actual_hours, + round(sum(r.actual_hours) - sum(r.planned_hours), 1) AS delta, + sum(r.completed_units) AS completed_units + ORDER BY w.week_id, s.station_code + """ + rows = run_query(cypher) + return pd.DataFrame(rows) + + +@st.cache_data(ttl=300, show_spinner=False) +def load_overrun_entries() -> pd.DataFrame: + """All entries where actual > planned * 1.10 (>10% overrun).""" + cypher = """ + MATCH (proj:Project)-[:HAS_RUN]->(entry:ProductionEntry)-[r:PROCESSED_AT]->(s:Station) + MATCH (entry)-[:SCHEDULED_IN]->(w:Week) + WHERE r.actual_hours > r.planned_hours * 1.10 + WITH s, proj, entry, r, w, + round(100.0 * (r.actual_hours - r.planned_hours) / r.planned_hours, 1) AS variance_pct + RETURN + s.station_code AS station_code, + s.station_name AS station_name, + proj.project_name AS project, + w.week_id AS week, + r.planned_hours AS planned_hours, + r.actual_hours AS actual_hours, + variance_pct + ORDER BY variance_pct DESC + """ + rows = run_query(cypher) + return pd.DataFrame(rows) + + +@st.cache_data(ttl=300, show_spinner=False) +def load_capacity() -> pd.DataFrame: + """Weekly capacity snapshots.""" + cypher = """ + MATCH (w:Week)-[:HAS_SNAPSHOT]->(cs:CapacitySnapshot) + RETURN + w.week_id AS week, + cs.own_hours AS own_hours, + cs.hired_hours AS hired_hours, + cs.overtime_hours AS overtime_hours, + cs.total_capacity AS total_capacity, + cs.total_planned AS total_planned, + cs.deficit AS deficit + ORDER BY w.week_id + """ + rows = run_query(cypher) + return pd.DataFrame(rows) + + +@st.cache_data(ttl=300, show_spinner=False) +def load_worker_coverage() -> pd.DataFrame: + """Per-station primary workers, backup workers, and coverage depth.""" + cypher = """ + MATCH (s:Station) + OPTIONAL MATCH (primary:Worker)-[:PRIMARILY_AT]->(s) + OPTIONAL MATCH (backup:Worker)-[:CAN_COVER]->(s) + WHERE backup IS NULL OR backup.worker_id <> primary.worker_id + RETURN + s.station_code AS station_code, + s.station_name AS station_name, + collect(DISTINCT primary.name) AS primary_workers, + collect(DISTINCT backup.name) AS backup_workers + ORDER BY s.station_code + """ + rows = run_query(cypher) + df = pd.DataFrame(rows) + if not df.empty: + # Remove nulls from list columns, compute coverage depth + df["primary_workers"] = df["primary_workers"].apply( + lambda lst: [x for x in lst if x] if isinstance(lst, list) else [] + ) + df["backup_workers"] = df["backup_workers"].apply( + lambda lst: [x for x in lst if x] if isinstance(lst, list) else [] + ) + df["coverage_depth"] = df["backup_workers"].apply(len) + df["primary_str"] = df["primary_workers"].apply(", ".join) + df["backup_str"] = df["backup_workers"].apply(", ".join) + return df + + +@st.cache_data(ttl=300, show_spinner=False) +def load_worker_cert_gap() -> pd.DataFrame: + """ + Stations that REQUIRE_CERT a certification for which NO worker + currently HOLDS that cert via CAN_COVER. These are coverage gaps. + """ + cypher = """ + MATCH (s:Station)-[:REQUIRES_CERT]->(c:Certification) + WHERE NOT EXISTS { + MATCH (w:Worker)-[:CAN_COVER]->(s) + MATCH (w)-[:HOLDS]->(c) + } + RETURN + s.station_code AS station_code, + s.station_name AS station_name, + c.name AS missing_cert, + c.cert_id AS cert_id + ORDER BY s.station_code + """ + rows = run_query(cypher) + return pd.DataFrame(rows) + + +@st.cache_data(ttl=300, show_spinner=False) +def load_worker_list() -> pd.DataFrame: + """All workers with their primary station and cert count.""" + cypher = """ + MATCH (w:Worker) + OPTIONAL MATCH (w)-[:PRIMARILY_AT]->(s:Station) + OPTIONAL MATCH (w)-[:HOLDS]->(c:Certification) + RETURN + w.worker_id AS worker_id, + w.name AS name, + w.role AS role, + w.type AS type, + w.hours_per_week AS hours_per_week, + s.station_code AS primary_station, + s.station_name AS primary_station_name, + count(DISTINCT c) AS cert_count + ORDER BY w.worker_id + """ + rows = run_query(cypher) + return pd.DataFrame(rows) + + +# ───────────────────────────────────────────────────────────────────────────── +# Sidebar navigation +# ───────────────────────────────────────────────────────────────────────────── + +PAGES = { + "🗂️ Project Overview": "project_overview", + "🏭 Station Load": "station_load", + "📊 Capacity Tracker": "capacity_tracker", + "👷 Worker Coverage": "worker_coverage", + "🧪 Self-Test": "self_test", +} + +with st.sidebar: + st.markdown("## 🏗️ Factory Dashboard") + st.markdown('', unsafe_allow_html=True) + selection = st.radio( + label="Page", + options=list(PAGES.keys()), + label_visibility="collapsed", + ) + st.markdown("---") + st.markdown('', unsafe_allow_html=True) + + # Show live connection status in sidebar + try: + drv = get_driver() + st.markdown('● Connected', unsafe_allow_html=True) + except Exception as e: + st.markdown('● Disconnected', unsafe_allow_html=True) + st.caption(str(e)[:120]) + + st.markdown("---") + st.caption("Neo4j 5.x · Streamlit · Plotly") + +page = PAGES[selection] + + +# ───────────────────────────────────────────────────────────────────────────── +# Helper: connection error banner +# ───────────────────────────────────────────────────────────────────────────── + +def show_connection_error(err: Exception): + st.error( + f"**Cannot reach Neo4j.** \n" + f"`{type(err).__name__}: {err}` \n\n" + "Check your `NEO4J_URI`, `NEO4J_USER`, `NEO4J_PASSWORD` in `.env` or Streamlit secrets." + ) + + +# ───────────────────────────────────────────────────────────────────────────── +# PAGE 1 — Project Overview +# ───────────────────────────────────────────────────────────────────────────── + +if page == "project_overview": + st.title("🗂️ Project Overview") + st.caption("Variance heatmap across all projects and weeks · Data sourced from Neo4j only") + + try: + df_summary = load_project_summary() + df_heat = load_project_overview() + + # ── KPI row ────────────────────────────────────────────────────────── + c1, c2, c3, c4 = st.columns(4) + c1.metric("Projects", len(df_summary)) + c2.metric("Total Planned h", f"{df_summary['total_planned'].sum():,.0f}") + c3.metric("Total Actual h", f"{df_summary['total_actual'].sum():,.0f}") + overall_var = ( + 100.0 + * (df_summary["total_actual"].sum() - df_summary["total_planned"].sum()) + / df_summary["total_planned"].sum() + ) + c4.metric("Overall Variance", f"{overall_var:+.1f}%") + + st.markdown("---") + + # ── Variance Heatmap ────────────────────────────────────────────────── + st.subheader("Variance % · Project × Week") + st.caption( + "Red = over plan · Green = under plan · " + "Cell value = (actual − planned) / planned × 100" + ) + + if not df_heat.empty: + pivot = df_heat.pivot_table( + index="project", columns="week", values="variance_pct", aggfunc="mean" + ) + # Sort weeks naturally (w1 < w2 … w8) + pivot = pivot.reindex(sorted(pivot.columns, key=lambda x: int(x[1:])), axis=1) + + fig_heat = go.Figure( + go.Heatmap( + z=pivot.values, + x=pivot.columns.tolist(), + y=pivot.index.tolist(), + colorscale=[ + [0.0, "#16a34a"], # dark green (most under) + [0.45, "#dcfce7"], # light green + [0.5, "#ffffff"], # white (on target) + [0.55, "#fee2e2"], # light red + [1.0, "#dc2626"], # dark red (most over) + ], + zmid=0, + text=[[f"{v:+.1f}%" if v == v else "" for v in row] for row in pivot.values], + texttemplate="%{text}", + hovertemplate="%{y}
Week: %{x}
Variance: %{z:+.1f}%", + colorbar=dict(title="Variance %"), + ) + ) + fig_heat.update_layout( + height=420, + margin=dict(l=20, r=20, t=30, b=20), + xaxis_title="Week", + yaxis_title="", + plot_bgcolor="rgba(0,0,0,0)", + paper_bgcolor="rgba(0,0,0,0)", + font=dict(color="#e2e8f0"), + ) + st.plotly_chart(fig_heat, use_container_width=True) + + st.markdown("---") + + # ── Project summary table ───────────────────────────────────────────── + st.subheader("Project Summary") + + def _colour_variance(val): + if isinstance(val, float): + if val > 10: + return "color: #ef4444; font-weight:600" + if val > 0: + return "color: #f59e0b" + return "color: #22c55e" + return "" + + styled = ( + df_summary.rename(columns={ + "project_number": "Number", + "project_name": "Name", + "etapp": "Etapp", + "runs": "Runs", + "total_planned": "Planned h", + "total_actual": "Actual h", + "variance_pct": "Variance %", + }) + .style + .applymap(_colour_variance, subset=["Variance %"]) + .format({"Planned h": "{:.1f}", "Actual h": "{:.1f}", "Variance %": "{:+.1f}"}) + ) + st.dataframe(styled, use_container_width=True, hide_index=True) + + # ── Planned vs Actual bar per project ──────────────────────────────── + st.subheader("Planned vs Actual Hours per Project") + fig_bar = go.Figure() + fig_bar.add_bar( + name="Planned", + x=df_summary["project_name"], + y=df_summary["total_planned"], + marker_color="#3b82f6", + ) + fig_bar.add_bar( + name="Actual", + x=df_summary["project_name"], + y=df_summary["total_actual"], + marker_color="#f97316", + ) + fig_bar.update_layout( + barmode="group", + height=380, + margin=dict(l=20, r=20, t=30, b=80), + legend=dict(orientation="h", y=1.05), + xaxis_tickangle=-30, + plot_bgcolor="rgba(0,0,0,0)", + paper_bgcolor="rgba(0,0,0,0)", + font=dict(color="#e2e8f0"), + yaxis_title="Hours", + ) + st.plotly_chart(fig_bar, use_container_width=True) + + except Exception as err: + show_connection_error(err) + + +# ───────────────────────────────────────────────────────────────────────────── +# PAGE 2 — Station Load +# ───────────────────────────────────────────────────────────────────────────── + +elif page == "station_load": + st.title("🏭 Station Load") + st.caption("Planned vs actual hours per station · Overruns highlighted") + + try: + df_load = load_station_load() + df_overrun = load_overrun_entries() + + if df_load.empty: + st.info("No station load data found.") + else: + # ── Filters ────────────────────────────────────────────────────── + col_f1, col_f2 = st.columns(2) + weeks = sorted(df_load["week"].unique(), key=lambda x: int(x[1:])) + stations = sorted(df_load["station_code"].unique()) + + sel_weeks = col_f1.multiselect("Filter Weeks", weeks, default=weeks) + sel_sta = col_f2.multiselect("Filter Stations", stations, default=stations) + + mask = df_load["week"].isin(sel_weeks) & df_load["station_code"].isin(sel_sta) + df_f = df_load[mask].copy() + + # ── KPI row ─────────────────────────────────────────────────────── + overloaded = df_f[df_f["delta"] > 0]["station_code"].nunique() + c1, c2, c3, c4 = st.columns(4) + c1.metric("Stations (filtered)", df_f["station_code"].nunique()) + c2.metric("Overloaded Stations", overloaded, + delta_color="inverse", delta=f"{overloaded} overloaded") + c3.metric("Total Planned h", f"{df_f['planned_hours'].sum():,.0f}") + c4.metric("Total Actual h", f"{df_f['actual_hours'].sum():,.0f}") + + st.markdown("---") + + # ── Grouped bar chart: planned vs actual ───────────────────────── + st.subheader("Planned vs Actual by Station & Week") + + fig_load = go.Figure() + color_map_planned = "#3b82f6" + color_map_actual = "#f97316" + + for week in sorted(sel_weeks, key=lambda x: int(x[1:])): + wdf = df_f[df_f["week"] == week] + fig_load.add_bar( + name=f"{week} Planned", + x=wdf["station_code"] + " " + wdf["station_name"].str[:12], + y=wdf["planned_hours"], + marker_color=color_map_planned, + opacity=0.7, + legendgroup=week, + legendgrouptitle_text=week if week == sorted(sel_weeks, key=lambda x: int(x[1:]))[0] else None, + ) + # Colour actual bars: red if delta > 0 else green + bar_colours = [ + "#ef4444" if d > 0 else "#22c55e" + for d in wdf["delta"].tolist() + ] + fig_load.add_bar( + name=f"{week} Actual", + x=wdf["station_code"] + " " + wdf["station_name"].str[:12], + y=wdf["actual_hours"], + marker_color=bar_colours, + legendgroup=week, + ) + + fig_load.update_layout( + barmode="group", + height=430, + margin=dict(l=20, r=20, t=30, b=90), + xaxis_tickangle=-35, + plot_bgcolor="rgba(0,0,0,0)", + paper_bgcolor="rgba(0,0,0,0)", + font=dict(color="#e2e8f0"), + yaxis_title="Hours", + legend=dict(orientation="h", y=1.05, font_size=10), + ) + st.plotly_chart(fig_load, use_container_width=True) + + # ── Delta heatmap: station × week ───────────────────────────────── + st.subheader("Delta Hours Heatmap (actual − planned)") + pivot_delta = df_f.pivot_table( + index="station_name", columns="week", values="delta", aggfunc="sum" + ) + pivot_delta = pivot_delta.reindex( + sorted(pivot_delta.columns, key=lambda x: int(x[1:])), axis=1 + ) + fig_dh = go.Figure( + go.Heatmap( + z=pivot_delta.values, + x=pivot_delta.columns.tolist(), + y=pivot_delta.index.tolist(), + colorscale=[[0, "#16a34a"], [0.5, "#ffffff"], [1, "#dc2626"]], + zmid=0, + text=[[f"{v:+.1f}" if v == v else "" for v in row] for row in pivot_delta.values], + texttemplate="%{text}", + hovertemplate="%{y}
%{x}
Δ = %{z:+.1f} h", + colorbar=dict(title="Δ Hours"), + ) + ) + fig_dh.update_layout( + height=380, + margin=dict(l=20, r=20, t=30, b=20), + plot_bgcolor="rgba(0,0,0,0)", + paper_bgcolor="rgba(0,0,0,0)", + font=dict(color="#e2e8f0"), + ) + st.plotly_chart(fig_dh, use_container_width=True) + + st.markdown("---") + + # ── Overrun detail table ─────────────────────────────────────────── + st.subheader("⚠️ Entries >10% Over Plan") + if df_overrun.empty: + st.success("No entries exceed 10% overrun threshold.") + else: + def _red(val): + if isinstance(val, float) and val > 20: + return "color:#ef4444;font-weight:600" + if isinstance(val, float) and val > 10: + return "color:#f59e0b" + return "" + + styled_ov = ( + df_overrun.rename(columns={ + "station_code": "Station", + "station_name": "Station Name", + "project": "Project", + "week": "Week", + "planned_hours": "Planned h", + "actual_hours": "Actual h", + "variance_pct": "Variance %", + }) + .style + .applymap(_red, subset=["Variance %"]) + .format({"Planned h": "{:.1f}", "Actual h": "{:.1f}", "Variance %": "{:+.1f}"}) + ) + st.dataframe(styled_ov, use_container_width=True, hide_index=True) + + except Exception as err: + show_connection_error(err) + + +# ───────────────────────────────────────────────────────────────────────────── +# PAGE 3 — Capacity Tracker +# ───────────────────────────────────────────────────────────────────────────── + +elif page == "capacity_tracker": + st.title("📊 Capacity Tracker") + st.caption("Weekly factory capacity vs planned demand · Deficit weeks highlighted") + + try: + df_cap = load_capacity() + + if df_cap.empty: + st.info("No capacity data found.") + else: + # ── KPI row ──────────────────────────────────────────────────────── + deficit_weeks = df_cap[df_cap["deficit"] < 0] + c1, c2, c3, c4 = st.columns(4) + c1.metric("Weeks Tracked", len(df_cap)) + c2.metric("Deficit Weeks", len(deficit_weeks)) + c3.metric("Avg Capacity h", f"{df_cap['total_capacity'].mean():,.0f}") + c4.metric("Avg Planned h", f"{df_cap['total_planned'].mean():,.0f}") + + st.markdown("---") + + # ── Stacked bar + planned line ──────────────────────────────────── + st.subheader("Capacity Breakdown vs Demand") + + fig_cap = go.Figure() + + # Stacked bars: own / hired / overtime + fig_cap.add_bar( + name="Own Hours", + x=df_cap["week"], + y=df_cap["own_hours"], + marker_color="#3b82f6", + ) + fig_cap.add_bar( + name="Hired Hours", + x=df_cap["week"], + y=df_cap["hired_hours"], + marker_color="#8b5cf6", + ) + fig_cap.add_bar( + name="Overtime Hours", + x=df_cap["week"], + y=df_cap["overtime_hours"], + marker_color="#f59e0b", + ) + + # Planned demand line + fig_cap.add_scatter( + name="Planned Demand", + x=df_cap["week"], + y=df_cap["total_planned"], + mode="lines+markers", + line=dict(color="#f97316", width=3, dash="dot"), + marker=dict(size=8), + ) + + # Shade deficit weeks with a vertical rectangle + for _, row in deficit_weeks.iterrows(): + fig_cap.add_vrect( + x0=row["week"], x1=row["week"], + fillcolor="rgba(239,68,68,0.12)", + layer="below", + line_width=0, + ) + + fig_cap.update_layout( + barmode="stack", + height=430, + margin=dict(l=20, r=20, t=30, b=20), + legend=dict(orientation="h", y=1.05), + plot_bgcolor="rgba(0,0,0,0)", + paper_bgcolor="rgba(0,0,0,0)", + font=dict(color="#e2e8f0"), + yaxis_title="Hours", + xaxis_title="Week", + ) + st.plotly_chart(fig_cap, use_container_width=True) + + # ── Deficit bar ─────────────────────────────────────────────────── + st.subheader("Deficit / Surplus per Week") + bar_cols = ["#ef4444" if d < 0 else "#22c55e" for d in df_cap["deficit"]] + fig_def = go.Figure( + go.Bar( + x=df_cap["week"], + y=df_cap["deficit"], + marker_color=bar_cols, + hovertemplate="Week: %{x}
Deficit: %{y:+.0f} h", + ) + ) + fig_def.add_hline(y=0, line_color="#888", line_dash="dash") + fig_def.update_layout( + height=280, + margin=dict(l=20, r=20, t=20, b=20), + plot_bgcolor="rgba(0,0,0,0)", + paper_bgcolor="rgba(0,0,0,0)", + font=dict(color="#e2e8f0"), + yaxis_title="Surplus / Deficit (h)", + ) + st.plotly_chart(fig_def, use_container_width=True) + + st.markdown("---") + + # ── Raw capacity table ──────────────────────────────────────────── + st.subheader("Raw Capacity Data") + + def _cap_colour(val): + if isinstance(val, (int, float)) and val < 0: + return "color:#ef4444;font-weight:600" + return "" + + styled_cap = ( + df_cap.rename(columns={ + "week": "Week", + "own_hours": "Own h", + "hired_hours": "Hired h", + "overtime_hours": "Overtime h", + "total_capacity": "Capacity h", + "total_planned": "Planned h", + "deficit": "Deficit h", + }) + .style + .applymap(_cap_colour, subset=["Deficit h"]) + ) + st.dataframe(styled_cap, use_container_width=True, hide_index=True) + + except Exception as err: + show_connection_error(err) + + +# ───────────────────────────────────────────────────────────────────────────── +# PAGE 4 — Worker Coverage +# ───────────────────────────────────────────────────────────────────────────── + +elif page == "worker_coverage": + st.title("👷 Worker Coverage") + st.caption("Station coverage matrix · Certification gaps · Worker roster") + + try: + df_cov = load_worker_coverage() + df_gap = load_worker_cert_gap() + df_workers = load_worker_list() + + # ── KPI row ──────────────────────────────────────────────────────────── + c1, c2, c3, c4 = st.columns(4) + c1.metric("Stations", len(df_cov)) + zero_cov = (df_cov["coverage_depth"] == 0).sum() if not df_cov.empty else 0 + c2.metric("Stations with No Backup", int(zero_cov)) + c3.metric("Workers", len(df_workers)) + c4.metric("Cert Gaps", len(df_gap)) + + st.markdown("---") + + # ── Coverage depth bar ───────────────────────────────────────────────── + st.subheader("Backup Coverage Depth per Station") + if not df_cov.empty: + col_depth = [ + "#ef4444" if d == 0 else "#f59e0b" if d <= 2 else "#22c55e" + for d in df_cov["coverage_depth"] + ] + fig_cov = go.Figure( + go.Bar( + x=df_cov["station_code"] + " · " + df_cov["station_name"], + y=df_cov["coverage_depth"], + marker_color=col_depth, + hovertemplate=( + "%{x}
" + "Backup workers: %{y}" + ), + ) + ) + fig_cov.add_hline(y=2, line_color="#888", line_dash="dash", + annotation_text="Min 2 backups", annotation_position="top right") + fig_cov.update_layout( + height=320, + margin=dict(l=20, r=20, t=30, b=90), + xaxis_tickangle=-35, + plot_bgcolor="rgba(0,0,0,0)", + paper_bgcolor="rgba(0,0,0,0)", + font=dict(color="#e2e8f0"), + yaxis_title="Backup Count", + ) + st.plotly_chart(fig_cov, use_container_width=True) + + # ── Coverage matrix table ────────────────────────────────────────────── + st.subheader("Station Coverage Matrix") + if not df_cov.empty: + def _cov_depth(val): + if isinstance(val, int): + if val == 0: + return "color:#ef4444;font-weight:600" + if val <= 2: + return "color:#f59e0b" + return "color:#22c55e" + return "" + + styled_cov = ( + df_cov[["station_code", "station_name", "primary_str", "backup_str", "coverage_depth"]] + .rename(columns={ + "station_code": "Code", + "station_name": "Station", + "primary_str": "Primary Workers", + "backup_str": "Backup Workers", + "coverage_depth": "Depth", + }) + .style + .applymap(_cov_depth, subset=["Depth"]) + ) + st.dataframe(styled_cov, use_container_width=True, hide_index=True) + + st.markdown("---") + + # ── Certification gap table ──────────────────────────────────────────── + st.subheader("⚠️ Certification Gaps") + st.caption("Stations that require a cert for which no covering worker is currently qualified.") + if df_gap.empty: + st.success("No certification gaps detected.") + else: + st.dataframe( + df_gap.rename(columns={ + "station_code": "Station Code", + "station_name": "Station", + "missing_cert": "Missing Certification", + "cert_id": "Cert ID", + }), + use_container_width=True, + hide_index=True, + ) + + st.markdown("---") + + # ── Worker roster ────────────────────────────────────────────────────── + st.subheader("Worker Roster") + if not df_workers.empty: + st.dataframe( + df_workers.rename(columns={ + "worker_id": "ID", + "name": "Name", + "role": "Role", + "type": "Type", + "hours_per_week": "h/week", + "primary_station": "Station Code", + "primary_station_name": "Station Name", + "cert_count": "Certs", + }), + use_container_width=True, + hide_index=True, + ) + + except Exception as err: + show_connection_error(err) + + +# ───────────────────────────────────────────────────────────────────────────── +# PAGE 5 — Self-Test +# ───────────────────────────────────────────────────────────────────────────── + +elif page == "self_test": + st.title("🧪 Self-Test") + st.caption( + "Full schema validation suite · " + "Checks connection, node counts, relationship counts, labels, types, and variance query." + ) + + # Expected schema constants (must match schema.md / seed_graph.py exactly) + EXPECTED_LABELS = { + "Project", "ProductionEntry", "Station", "Product", + "Worker", "Week", "CapacitySnapshot", "Certification", "BOP", + } + + EXPECTED_REL_TYPES = { + "HAS_RUN", "USES_PRODUCT", "PROCESSED_AT", "SCHEDULED_IN", + "REQUIRES_STATION", "STRUCTURED_BY", "PRIMARILY_AT", "CAN_COVER", + "WORKED_ON", "HOLDS", "REQUIRES_CERT", "HAS_SNAPSHOT", + } + + MIN_NODES = 50 + MIN_RELS = 100 + + def badge(ok: bool, ok_text: str = "PASS", fail_text: str = "FAIL") -> str: + cls = "badge-ok" if ok else "badge-fail" + text = ok_text if ok else fail_text + return f'{text}' + + if st.button("▶ Run All Tests", type="primary"): + results = [] + + # ── Test 1: Neo4j connection ───────────────────────────────────────── + with st.spinner("1/6 · Testing connection …"): + try: + drv = get_driver() + conn_ok = True + conn_msg = "Connected successfully" + except Exception as e: + conn_ok = False + conn_msg = str(e) + results.append(("Neo4j Connection", conn_ok, conn_msg)) + + if conn_ok: + + # ── Test 2: Node labels ────────────────────────────────────────── + with st.spinner("2/6 · Checking node labels …"): + try: + rows = run_query("CALL db.labels() YIELD label RETURN label") + actual_labels = {r["label"] for r in rows} + missing = EXPECTED_LABELS - actual_labels + extra = actual_labels - EXPECTED_LABELS + labels_ok = len(missing) == 0 + label_msg = ( + f"Found {len(actual_labels)} labels. " + + (f"Missing: {missing}" if missing else "All 9 expected labels present.") + + (f" Extra: {extra}" if extra else "") + ) + except Exception as e: + labels_ok = False + label_msg = str(e) + results.append(("Node Labels (9 expected)", labels_ok, label_msg)) + + # ── Test 3: Relationship types ─────────────────────────────────── + with st.spinner("3/6 · Checking relationship types …"): + try: + rows = run_query("CALL db.relationshipTypes() YIELD relationshipType AS rel RETURN rel") + actual_rels = {r["rel"] for r in rows} + missing_r = EXPECTED_REL_TYPES - actual_rels + extra_r = actual_rels - EXPECTED_REL_TYPES + rels_ok = len(missing_r) == 0 + rel_msg = ( + f"Found {len(actual_rels)} relationship types. " + + (f"Missing: {missing_r}" if missing_r else "All 12 expected types present.") + + (f" Extra: {extra_r}" if extra_r else "") + ) + except Exception as e: + rels_ok = False + rel_msg = str(e) + results.append(("Relationship Types (12 expected)", rels_ok, rel_msg)) + + # ── Test 4: Node count ─────────────────────────────────────────── + with st.spinner("4/6 · Counting nodes …"): + node_counts = {} + try: + for label in sorted(EXPECTED_LABELS): + rows = run_query(f"MATCH (n:{label}) RETURN count(n) AS c") + node_counts[label] = rows[0]["c"] if rows else 0 + total_nodes = sum(node_counts.values()) + count_ok = total_nodes >= MIN_NODES + count_msg_parts = [f"{lbl}: {cnt}" for lbl, cnt in sorted(node_counts.items())] + count_msg = f"Total: {total_nodes} ({' · '.join(count_msg_parts)})" + except Exception as e: + count_ok = False + count_msg = str(e) + results.append((f"Node Count (≥ {MIN_NODES})", count_ok, count_msg)) + + # ── Test 5: Relationship count ─────────────────────────────────── + with st.spinner("5/6 · Counting relationships …"): + rel_counts = {} + try: + for rel_type in sorted(EXPECTED_REL_TYPES): + rows = run_query(f"MATCH ()-[r:{rel_type}]->() RETURN count(r) AS c") + rel_counts[rel_type] = rows[0]["c"] if rows else 0 + total_rels = sum(rel_counts.values()) + rel_count_ok = total_rels >= MIN_RELS + rc_parts = [f"{t}: {c}" for t, c in sorted(rel_counts.items())] + rel_count_msg = f"Total: {total_rels} ({' · '.join(rc_parts)})" + except Exception as e: + rel_count_ok = False + rel_count_msg = str(e) + results.append((f"Relationship Count (≥ {MIN_RELS})", rel_count_ok, rel_count_msg)) + + # ── Test 6: Variance query (edge properties on PROCESSED_AT) ───── + with st.spinner("6/6 · Running variance query …"): + variance_cypher = textwrap.dedent(""" + MATCH (proj:Project)-[:HAS_RUN]->(entry:ProductionEntry)-[r:PROCESSED_AT]->(s:Station) + WHERE r.actual_hours > r.planned_hours * 1.10 + WITH s, proj, entry, r, + round(100.0 * (r.actual_hours - r.planned_hours) / r.planned_hours, 1) AS variance_pct + RETURN + s.station_code AS station, + s.station_name AS station_name, + count(*) AS overrun_count + ORDER BY overrun_count DESC + LIMIT 5 + """).strip() + try: + var_rows = run_query(variance_cypher) + var_ok = len(var_rows) > 0 + var_msg = ( + f"Returned {len(var_rows)} stations with >10% overruns. " + "Top: " + ( + ", ".join( + f"{r['station']} ({r['overrun_count']} entries)" + for r in var_rows[:3] + ) + ) if var_rows else "No overrun entries found (graph may have perfect plan adherence)." + ) + except Exception as e: + var_ok = False + var_msg = str(e) + results.append(("Variance Query (PROCESSED_AT edge props)", var_ok, var_msg)) + + # ── Render results ──────────────────────────────────────────────────── + st.markdown("---") + passed = sum(1 for _, ok, _ in results if ok) + total = len(results) + + summary_cls = "badge-ok" if passed == total else "badge-warn" if passed >= total // 2 else "badge-fail" + st.markdown( + f'

Results: {passed} / {total} passed

', + unsafe_allow_html=True, + ) + + for name, ok, msg in results: + icon = "✅" if ok else "❌" + with st.expander(f"{icon} {name}", expanded=not ok): + st.markdown( + badge(ok) + f" {msg}", + unsafe_allow_html=True, + ) + + st.markdown("---") + + # ── Node count breakdown chart ───────────────────────────────────────── + if node_counts: + st.subheader("Node Count Breakdown") + nc_df = pd.DataFrame( + [{"Label": k, "Count": v} for k, v in sorted(node_counts.items())] + ) + fig_nc = px.bar( + nc_df, x="Label", y="Count", + color="Count", + color_continuous_scale=["#3b82f6", "#22c55e"], + text="Count", + ) + fig_nc.update_traces(textposition="outside") + fig_nc.update_layout( + height=320, + margin=dict(l=20, r=20, t=20, b=20), + plot_bgcolor="rgba(0,0,0,0)", + paper_bgcolor="rgba(0,0,0,0)", + font=dict(color="#e2e8f0"), + showlegend=False, + xaxis_tickangle=-30, + coloraxis_showscale=False, + ) + st.plotly_chart(fig_nc, use_container_width=True) + + # ── Relationship count breakdown ─────────────────────────────────────── + if rel_counts: + st.subheader("Relationship Count Breakdown") + rc_df = pd.DataFrame( + [{"Type": k, "Count": v} for k, v in sorted(rel_counts.items())] + ).sort_values("Count", ascending=True) + fig_rc = px.bar( + rc_df, x="Count", y="Type", + orientation="h", + color="Count", + color_continuous_scale=["#8b5cf6", "#f59e0b"], + text="Count", + ) + fig_rc.update_traces(textposition="outside") + fig_rc.update_layout( + height=360, + margin=dict(l=20, r=20, t=20, b=20), + plot_bgcolor="rgba(0,0,0,0)", + paper_bgcolor="rgba(0,0,0,0)", + font=dict(color="#e2e8f0"), + showlegend=False, + coloraxis_showscale=False, + ) + st.plotly_chart(fig_rc, use_container_width=True) + + # ── Raw variance query output ────────────────────────────────────────── + if "var_rows" in dir() and var_rows: + st.subheader("Variance Query Output (top stations by overrun count)") + st.dataframe( + pd.DataFrame(var_rows).rename(columns={ + "station": "Station Code", + "station_name": "Station Name", + "overrun_count": "Overrun Entries", + }), + use_container_width=True, + hide_index=True, + ) + + else: + st.info("Click **▶ Run All Tests** to validate the graph schema against schema.md.") + + st.markdown("### Test Suite") + tests = [ + ("1", "Neo4j Connection", + "Verifies the driver can reach the database and authenticate."), + ("2", "Node Labels", + "Checks all 9 expected labels are present: Project, ProductionEntry, Station, " + "Product, Worker, Week, CapacitySnapshot, Certification, BOP."), + ("3", "Relationship Types", + "Checks all 12 expected types: HAS_RUN, USES_PRODUCT, PROCESSED_AT, SCHEDULED_IN, " + "REQUIRES_STATION, STRUCTURED_BY, PRIMARILY_AT, CAN_COVER, WORKED_ON, HOLDS, " + "REQUIRES_CERT, HAS_SNAPSHOT."), + ("4", f"Node Count (≥ {MIN_NODES})", + "Per-label counts, total must reach 50+ nodes."), + ("5", f"Relationship Count (≥ {MIN_RELS})", + "Per-type counts, total must reach 100+ relationships."), + ("6", "Variance Query", + "Runs the full overrun detection query against PROCESSED_AT edge properties " + "(planned_hours, actual_hours). Confirms edge data is hydrated."), + ] + for num, name, desc in tests: + with st.expander(f"Test {num} — {name}"): + st.write(desc) diff --git a/submissions/abhinav-chaudhary/level6/factory_capacity.csv b/submissions/abhinav-chaudhary/level6/factory_capacity.csv new file mode 100644 index 000000000..795ff52f0 --- /dev/null +++ b/submissions/abhinav-chaudhary/level6/factory_capacity.csv @@ -0,0 +1,9 @@ +week,own_staff_count,hired_staff_count,own_hours,hired_hours,overtime_hours,total_capacity,total_planned,deficit +w1,10,2,400,80,0,480,612,-132 +w2,10,2,400,80,40,520,645,-125 +w3,10,2,400,80,0,480,398,82 +w4,10,2,400,80,20,500,550,-50 +w5,10,2,400,80,30,510,480,30 +w6,9,2,360,80,0,440,520,-80 +w7,10,2,400,80,40,520,600,-80 +w8,10,2,400,80,20,500,470,30 \ No newline at end of file diff --git a/submissions/abhinav-chaudhary/level6/factory_production.csv b/submissions/abhinav-chaudhary/level6/factory_production.csv new file mode 100644 index 000000000..ca6ce43e1 --- /dev/null +++ b/submissions/abhinav-chaudhary/level6/factory_production.csv @@ -0,0 +1,69 @@ +project_id,project_number,project_name,product_type,unit,quantity,unit_factor,station_code,station_name,etapp,bop,week,planned_hours,actual_hours,completed_units +P01,4501,Stålverket Borås,IQB,meter,600,1.77,011,FS IQB,ET1,BOP1,w1,48.0,45.2,28 +P01,4501,Stålverket Borås,IQB,meter,600,1.77,012,Förmontering IQB,ET1,BOP1,w1,32.0,35.5,25 +P01,4501,Stålverket Borås,IQB,meter,600,1.77,013,Montering IQB,ET1,BOP1,w1,28.0,26.0,22 +P01,4501,Stålverket Borås,IQB,meter,600,1.77,014,Svets o montage IQB,ET1,BOP1,w1,35.0,38.2,20 +P01,4501,Stålverket Borås,SB,styck,40,4.0,018,SB B/F-hall,ET1,BOP1,w1,16.0,14.5,4 +P01,4501,Stålverket Borås,SP,styck,180,2.0,019,SP B/F-hall,ET1,BOP1,w1,12.0,13.0,7 +P01,4501,Stålverket Borås,IQB,meter,600,1.77,011,FS IQB,ET1,BOP1,w2,48.0,50.0,32 +P01,4501,Stålverket Borås,IQB,meter,600,1.77,012,Förmontering IQB,ET1,BOP1,w2,32.0,30.0,28 +P01,4501,Stålverket Borås,IQP,styck,90,2.80,015,Montering IQP,ET1,BOP2,w2,25.0,28.0,9 +P01,4501,Stålverket Borås,SR,styck,8,45.0,021,SR B/F-hall,ET1,BOP2,w2,40.0,42.0,1 +P02,4502,Kontorshus Mölndal,IQB,meter,350,1.50,011,FS IQB,ET1,BOP1,w1,30.0,28.0,20 +P02,4502,Kontorshus Mölndal,IQB,meter,350,1.50,012,Förmontering IQB,ET1,BOP1,w1,22.0,24.5,18 +P02,4502,Kontorshus Mölndal,IQB,meter,350,1.50,013,Montering IQB,ET1,BOP1,w1,18.0,17.0,16 +P02,4502,Kontorshus Mölndal,IQP,styck,70,2.70,015,Montering IQP,ET1,BOP1,w1,19.0,21.0,7 +P02,4502,Kontorshus Mölndal,SD,styck,30,3.00,018,SB B/F-hall,ET1,BOP1,w1,9.0,8.5,3 +P02,4502,Kontorshus Mölndal,IQB,meter,350,1.50,011,FS IQB,ET1,BOP1,w2,30.0,32.0,24 +P02,4502,Kontorshus Mölndal,IQB,meter,350,1.50,014,Svets o montage IQB,ET1,BOP1,w2,25.0,23.0,20 +P02,4502,Kontorshus Mölndal,SP,styck,120,1.75,019,SP B/F-hall,ET1,BOP2,w2,14.0,15.5,8 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,011,FS IQB,ET1,BOP1,w1,72.0,70.0,40 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,012,Förmontering IQB,ET1,BOP1,w1,48.0,52.0,35 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,013,Montering IQB,ET1,BOP1,w1,38.0,36.5,30 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,014,Svets o montage IQB,ET1,BOP1,w1,42.0,48.0,28 +P03,4503,Lagerhall Jönköping,SB,styck,60,6.00,018,SB B/F-hall,ET1,BOP1,w1,36.0,38.0,6 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,011,FS IQB,ET1,BOP1,w2,72.0,75.0,45 +P03,4503,Lagerhall Jönköping,IQP,styck,110,2.90,015,Montering IQP,ET1,BOP2,w2,32.0,30.0,11 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,016,Gjutning,ET1,BOP2,w2,28.0,35.0,8 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,017,Målning,ET1,BOP2,w3,24.0,22.0,20 +P04,4504,Parkering Helsingborg,IQB,meter,450,1.65,011,FS IQB,ET1,BOP1,w1,38.0,36.0,24 +P04,4504,Parkering Helsingborg,IQB,meter,450,1.65,012,Förmontering IQB,ET1,BOP1,w1,25.0,27.0,20 +P04,4504,Parkering Helsingborg,IQB,meter,450,1.65,013,Montering IQB,ET1,BOP1,w1,20.0,19.0,18 +P04,4504,Parkering Helsingborg,IQP,styck,55,2.85,015,Montering IQP,ET1,BOP1,w1,16.0,18.0,6 +P04,4504,Parkering Helsingborg,SB,styck,25,7.50,018,SB B/F-hall,ET1,BOP1,w1,19.0,22.0,3 +P04,4504,Parkering Helsingborg,IQB,meter,450,1.65,011,FS IQB,ET1,BOP1,w2,38.0,40.0,28 +P04,4504,Parkering Helsingborg,SP,styck,100,2.00,019,SP B/F-hall,ET1,BOP2,w2,12.0,11.0,6 +P04,4504,Parkering Helsingborg,SR,styck,12,120.0,021,SR B/F-hall,ET1,BOP2,w2,60.0,65.0,1 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,011,FS IQB,ET2,BOP3,w1,95.0,90.0,50 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,012,Förmontering IQB,ET2,BOP3,w1,65.0,68.0,42 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,013,Montering IQB,ET2,BOP3,w1,50.0,48.0,38 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,014,Svets o montage IQB,ET2,BOP3,w1,58.0,62.0,35 +P05,4505,Sjukhus Linköping ET2,IQP,styck,150,2.88,015,Montering IQP,ET2,BOP3,w1,30.0,33.0,10 +P05,4505,Sjukhus Linköping ET2,SB,styck,50,5.00,018,SB B/F-hall,ET2,BOP3,w1,25.0,28.0,5 +P05,4505,Sjukhus Linköping ET2,SD,styck,45,2.75,018,SB B/F-hall,ET2,BOP3,w1,12.0,11.5,4 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,011,FS IQB,ET2,BOP3,w2,95.0,98.0,55 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,016,Gjutning,ET2,BOP3,w2,35.0,40.0,12 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,017,Målning,ET2,BOP3,w2,28.0,26.0,25 +P05,4505,Sjukhus Linköping ET2,SR,styck,20,274.0,021,SR B/F-hall,ET2,BOP3,w3,120.0,115.0,2 +P06,4506,Skola Uppsala,IQB,meter,500,1.60,011,FS IQB,ET1,BOP1,w2,40.0,38.0,26 +P06,4506,Skola Uppsala,IQB,meter,500,1.60,012,Förmontering IQB,ET1,BOP1,w2,28.0,30.0,22 +P06,4506,Skola Uppsala,IQB,meter,500,1.60,013,Montering IQB,ET1,BOP1,w2,22.0,20.0,18 +P06,4506,Skola Uppsala,IQP,styck,80,2.75,015,Montering IQP,ET1,BOP1,w2,22.0,24.0,8 +P06,4506,Skola Uppsala,SB,styck,35,4.50,018,SB B/F-hall,ET1,BOP1,w2,16.0,18.0,4 +P06,4506,Skola Uppsala,SP,styck,140,1.50,019,SP B/F-hall,ET1,BOP2,w3,14.0,12.0,10 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,011,FS IQB,ET1,BOP1,w1,45.0,42.0,22 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,012,Förmontering IQB,ET1,BOP1,w1,30.0,33.0,18 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,014,Svets o montage IQB,ET1,BOP1,w1,35.0,32.0,16 +P07,4507,Idrottshall Västerås,SB,styck,45,3.50,018,SB B/F-hall,ET1,BOP1,w1,16.0,18.0,5 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,011,FS IQB,ET1,BOP1,w2,45.0,48.0,26 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,016,Gjutning,ET1,BOP2,w2,20.0,22.0,5 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,017,Målning,ET1,BOP2,w3,18.0,16.0,15 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,011,FS IQB,ET1,BOP1,w1,65.0,62.0,36 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,012,Förmontering IQB,ET1,BOP1,w1,42.0,45.0,30 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,013,Montering IQB,ET1,BOP1,w1,35.0,38.0,25 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,014,Svets o montage IQB,ET1,BOP1,w1,40.0,44.0,22 +P08,4508,Bro E6 Halmstad,SP,styck,200,2.50,019,SP B/F-hall,ET1,BOP1,w1,20.0,18.0,8 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,011,FS IQB,ET1,BOP1,w2,65.0,68.0,42 +P08,4508,Bro E6 Halmstad,IQP,styck,95,2.93,015,Montering IQP,ET1,BOP2,w2,28.0,30.0,10 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,016,Gjutning,ET1,BOP2,w3,22.0,25.0,8 +P08,4508,Bro E6 Halmstad,SR,styck,15,180.0,021,SR B/F-hall,ET1,BOP2,w3,90.0,85.0,2 \ No newline at end of file diff --git a/submissions/abhinav-chaudhary/level6/factory_workers.csv b/submissions/abhinav-chaudhary/level6/factory_workers.csv new file mode 100644 index 000000000..3110285cc --- /dev/null +++ b/submissions/abhinav-chaudhary/level6/factory_workers.csv @@ -0,0 +1,15 @@ +worker_id,name,role,primary_station,can_cover_stations,certifications,hours_per_week,type +W01,Erik Lindberg,Operator,011,"011,012","MIG/MAG,TIG,ISO 9606",40,permanent +W02,Anna Berg,Operator,011,"011,014","MIG/MAG,TIG",40,permanent +W03,Lars Jensen,Operator,012,"012,013","Surface treatment,CE marking",40,permanent +W04,Maria Stone,Operator,013,"013","Blasting,Surface protection",40,permanent +W05,Johan Peters,Operator,014,"014,015","Hydraulics,Mechanics,Crane",40,permanent +W06,Karen Nilsen,Inspector,015,"015","SIS,SS-EN 1090,NDT",40,permanent +W07,Per Hansen,Operator,016,"016,017","Casting,Formwork",40,permanent +W08,Sofia Arden,Operator,017,"017","Surface treatment,Spray painting",40,permanent +W09,Magnus Stone,Operator,018,"018,019","Sheet metal,Assembly",40,permanent +W10,Elin Frank,Operator,019,"019,018","Assembly,Welding",32,permanent +W11,Victor Elm,Foreman,all,"011,012,013,014,015,016,017,018,019,021","Leadership,CE,ISO 9001",45,permanent +W12,Lena Dale,Quality Manager,015,"015","ISO 9001,SS-EN 1090,Audit",40,permanent +W13,Ahmed Hassan,Operator,011,"011","MIG/MAG",40,hired +W14,Petra Steen,Operator,012,"012,013","Surface treatment",40,hired \ No newline at end of file diff --git a/submissions/abhinav-chaudhary/level6/requirements.txt b/submissions/abhinav-chaudhary/level6/requirements.txt new file mode 100644 index 000000000..15213677a --- /dev/null +++ b/submissions/abhinav-chaudhary/level6/requirements.txt @@ -0,0 +1,5 @@ +streamlit +neo4j +python-dotenv +pandas +plotly \ No newline at end of file diff --git a/submissions/abhinav-chaudhary/level6/seed_graph.py b/submissions/abhinav-chaudhary/level6/seed_graph.py new file mode 100644 index 000000000..cd5c4ab54 --- /dev/null +++ b/submissions/abhinav-chaudhary/level6/seed_graph.py @@ -0,0 +1,717 @@ +""" +seed_graph.py — Level 6 Neo4j Graph Seed Script +================================================ +Swedish steel fabrication factory knowledge graph. + +Loads: + - factory_production.csv → Project, Product, Station, ProductionEntry, BOP + - factory_workers.csv → Worker, Certification + - factory_capacity.csv → Week, CapacitySnapshot + +Relationships created (12 types, 100+ instances): + HAS_RUN, USES_PRODUCT, PROCESSED_AT*, SCHEDULED_IN*, REQUIRES_STATION, + STRUCTURED_BY, PRIMARILY_AT, CAN_COVER, WORKED_ON, HOLDS, + REQUIRES_CERT, HAS_SNAPSHOT + + * PROCESSED_AT carries: planned_hours, actual_hours, completed_units + * SCHEDULED_IN carries: planned_hours, actual_hours + +Usage: + pip install neo4j python-dotenv + Copy your .env next to this file (see .env.example below), then: + python seed_graph.py + +.env.example: + NEO4J_URI=bolt://localhost:7687 + NEO4J_USER=neo4j + NEO4J_PASSWORD=your_password +""" + +import csv +import os +import sys +from pathlib import Path +from dotenv import load_dotenv +from neo4j import GraphDatabase + +# ───────────────────────────────────────────── +# 0. Configuration +# ───────────────────────────────────────────── +load_dotenv() + +NEO4J_URI = os.getenv("NEO4J_URI") +NEO4J_USER = os.getenv("NEO4J_USER") +NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD") + +# CSVs must be in the same directory as this script +BASE_DIR = Path(__file__).parent +PRODUCTION_CSV = BASE_DIR / "factory_production.csv" +WORKERS_CSV = BASE_DIR / "factory_workers.csv" +CAPACITY_CSV = BASE_DIR / "factory_capacity.csv" + +# Station → certifications required (derived from worker cert domain knowledge +# and the answers.md analysis; used to build REQUIRES_CERT edges) +STATION_CERT_MAP = { + "011": ["MIG/MAG"], + "012": ["Surface treatment", "CE marking"], + "013": ["Surface treatment"], + "014": ["MIG/MAG", "TIG"], + "015": ["SIS", "SS-EN 1090", "NDT"], + "016": ["Casting", "Formwork"], + "017": ["Surface treatment", "Spray painting"], + "018": ["Sheet metal", "Assembly"], + "019": ["Assembly", "Welding"], + "021": ["CE", "ISO 9001"], +} + + +# ───────────────────────────────────────────── +# 1. CSV helpers +# ───────────────────────────────────────────── + +def load_csv(path: Path) -> list[dict]: + """Read a CSV file and return a list of row dicts.""" + with open(path, newline="", encoding="utf-8") as fh: + return list(csv.DictReader(fh)) + + +def split_field(value: str) -> list[str]: + """Split a comma-separated field, stripping whitespace from each part.""" + return [v.strip() for v in value.split(",") if v.strip()] + + +# ───────────────────────────────────────────── +# 2. Constraint / index creation +# ───────────────────────────────────────────── + +CONSTRAINTS = [ + # Uniqueness constraint per node label + primary key + ("Project", "project_id"), + ("ProductionEntry", "entry_id"), + ("Station", "station_code"), + ("Product", "product_type"), + ("Worker", "worker_id"), + ("Week", "week_id"), + ("Certification", "cert_id"), + ("BOP", "bop_id"), + # CapacitySnapshot has no natural standalone PK; it's always reached via + # Week so no separate uniqueness constraint needed. +] + + +def create_constraints(session) -> None: + """Create uniqueness constraints idempotently (Neo4j 5.x syntax).""" + print(" Creating uniqueness constraints …") + for label, prop in CONSTRAINTS: + name = f"unique_{label.lower()}_{prop}" + cypher = ( + f"CREATE CONSTRAINT {name} IF NOT EXISTS " + f"FOR (n:{label}) REQUIRE n.{prop} IS UNIQUE" + ) + session.run(cypher) + print(f" ✓ {len(CONSTRAINTS)} constraints ensured.") + + +# ───────────────────────────────────────────── +# 3. Node seeding +# ───────────────────────────────────────────── + +def seed_projects_products_stations_bops_entries(session, rows: list[dict]) -> None: + """ + From factory_production.csv create: + Project, Product, Station, BOP, ProductionEntry nodes + and: + HAS_RUN, USES_PRODUCT, PROCESSED_AT, SCHEDULED_IN, + REQUIRES_STATION, STRUCTURED_BY relationships. + """ + print(" Merging Project / Product / Station / BOP nodes …") + + # ── Projects ────────────────────────────── + projects_seen = {} + for r in rows: + pid = r["project_id"] + if pid not in projects_seen: + projects_seen[pid] = r + + session.run( + """ + UNWIND $projects AS p + MERGE (n:Project {project_id: p.project_id}) + ON CREATE SET + n.project_number = p.project_number, + n.project_name = p.project_name, + n.etapp = p.etapp + """, + projects=[ + { + "project_id": v["project_id"], + "project_number": v["project_number"], + "project_name": v["project_name"], + "etapp": v["etapp"], + } + for v in projects_seen.values() + ], + ) + print(f" ✓ {len(projects_seen)} Project nodes") + + # ── Products ────────────────────────────── + products_seen = {} + for r in rows: + pt = r["product_type"] + if pt not in products_seen: + products_seen[pt] = r + + session.run( + """ + UNWIND $products AS p + MERGE (n:Product {product_type: p.product_type}) + ON CREATE SET + n.unit = p.unit, + n.unit_factor = toFloat(p.unit_factor) + """, + products=[ + { + "product_type": v["product_type"], + "unit": v["unit"], + "unit_factor": v["unit_factor"], + } + for v in products_seen.values() + ], + ) + print(f" ✓ {len(products_seen)} Product nodes") + + # ── Stations ────────────────────────────── + stations_seen = {} + for r in rows: + sc = r["station_code"] + if sc not in stations_seen: + stations_seen[sc] = r + + session.run( + """ + UNWIND $stations AS s + MERGE (n:Station {station_code: s.station_code}) + ON CREATE SET n.station_name = s.station_name + """, + stations=[ + {"station_code": v["station_code"], "station_name": v["station_name"]} + for v in stations_seen.values() + ], + ) + print(f" ✓ {len(stations_seen)} Station nodes") + + # ── BOPs ────────────────────────────────── + bops_seen = {} + for r in rows: + bid = r["bop"] + if bid not in bops_seen: + bops_seen[bid] = r + + session.run( + """ + UNWIND $bops AS b + MERGE (n:BOP {bop_id: b.bop_id}) + ON CREATE SET n.etapp = b.etapp + """, + bops=[ + {"bop_id": v["bop"], "etapp": v["etapp"]} + for v in bops_seen.values() + ], + ) + print(f" ✓ {len(bops_seen)} BOP nodes") + + # ── ProductionEntry nodes ───────────────── + # One node per CSV row; entry_id = "__" + entries = [] + for i, r in enumerate(rows): + entry_id = f"{r['project_id']}_{r['station_code']}_{r['week']}" + entries.append( + { + "entry_id": entry_id, + "planned_hours": float(r["planned_hours"]), + "actual_hours": float(r["actual_hours"]), + "completed_units": int(r["completed_units"]), + "quantity": int(r["quantity"]), + # FK references for relationship creation below + "project_id": r["project_id"], + "product_type": r["product_type"], + "station_code": r["station_code"], + "bop_id": r["bop"], + "week_id": r["week"], + } + ) + + session.run( + """ + UNWIND $entries AS e + MERGE (n:ProductionEntry {entry_id: e.entry_id}) + ON CREATE SET + n.planned_hours = e.planned_hours, + n.actual_hours = e.actual_hours, + n.completed_units = e.completed_units, + n.quantity = e.quantity + """, + entries=entries, + ) + print(f" ✓ {len(entries)} ProductionEntry nodes") + + # ───────────────────────────────────────── + # Relationships from production data + # ───────────────────────────────────────── + print(" Creating production relationships …") + + # HAS_RUN: Project → ProductionEntry + session.run( + """ + UNWIND $entries AS e + MATCH (proj:Project {project_id: e.project_id}) + MATCH (pe:ProductionEntry {entry_id: e.entry_id}) + MERGE (proj)-[:HAS_RUN]->(pe) + """, + entries=entries, + ) + print(f" ✓ HAS_RUN ({len(entries)} edges)") + + # USES_PRODUCT: ProductionEntry → Product + session.run( + """ + UNWIND $entries AS e + MATCH (pe:ProductionEntry {entry_id: e.entry_id}) + MATCH (pr:Product {product_type: e.product_type}) + MERGE (pe)-[:USES_PRODUCT]->(pr) + """, + entries=entries, + ) + print(f" ✓ USES_PRODUCT ({len(entries)} edges)") + + # PROCESSED_AT: ProductionEntry → Station [★ with properties] + session.run( + """ + UNWIND $entries AS e + MATCH (pe:ProductionEntry {entry_id: e.entry_id}) + MATCH (st:Station {station_code: e.station_code}) + MERGE (pe)-[r:PROCESSED_AT]->(st) + ON CREATE SET + r.planned_hours = e.planned_hours, + r.actual_hours = e.actual_hours, + r.completed_units = e.completed_units + """, + entries=entries, + ) + print(f" ✓ PROCESSED_AT ({len(entries)} edges, with planned_hours / actual_hours / completed_units)") + + # SCHEDULED_IN: ProductionEntry → Week [★ with properties] + session.run( + """ + UNWIND $entries AS e + MATCH (pe:ProductionEntry {entry_id: e.entry_id}) + MATCH (w:Week {week_id: e.week_id}) + MERGE (pe)-[r:SCHEDULED_IN]->(w) + ON CREATE SET + r.planned_hours = e.planned_hours, + r.actual_hours = e.actual_hours + """, + entries=entries, + ) + print(f" ✓ SCHEDULED_IN ({len(entries)} edges, with planned_hours / actual_hours)") + + # REQUIRES_STATION: Project → Station (derived — distinct pairs) + proj_station_pairs = list( + {(r["project_id"], r["station_code"]) for r in rows} + ) + session.run( + """ + UNWIND $pairs AS pair + MATCH (proj:Project {project_id: pair[0]}) + MATCH (st:Station {station_code: pair[1]}) + MERGE (proj)-[:REQUIRES_STATION]->(st) + """, + pairs=[[p, s] for p, s in proj_station_pairs], + ) + print(f" ✓ REQUIRES_STATION ({len(proj_station_pairs)} edges)") + + # STRUCTURED_BY: Project → BOP + proj_bop_pairs = list( + {(r["project_id"], r["bop"]) for r in rows} + ) + session.run( + """ + UNWIND $pairs AS pair + MATCH (proj:Project {project_id: pair[0]}) + MATCH (b:BOP {bop_id: pair[1]}) + MERGE (proj)-[:STRUCTURED_BY]->(b) + """, + pairs=[[p, b] for p, b in proj_bop_pairs], + ) + print(f" ✓ STRUCTURED_BY ({len(proj_bop_pairs)} edges)") + + +def seed_weeks_and_snapshots(session, rows: list[dict]) -> None: + """ + From factory_capacity.csv create: + Week, CapacitySnapshot nodes + and: + HAS_SNAPSHOT relationship. + """ + print(" Merging Week / CapacitySnapshot nodes …") + + weeks = [] + for r in rows: + weeks.append( + { + "week_id": r["week"], + "own_hours": int(r["own_hours"]), + "hired_hours": int(r["hired_hours"]), + "overtime_hours": int(r["overtime_hours"]), + "total_capacity": int(r["total_capacity"]), + "total_planned": int(r["total_planned"]), + "deficit": int(r["deficit"]), + } + ) + + # Week nodes (lightweight time anchors) + session.run( + """ + UNWIND $weeks AS w + MERGE (n:Week {week_id: w.week_id}) + """, + weeks=weeks, + ) + print(f" ✓ {len(weeks)} Week nodes") + + # CapacitySnapshot nodes keyed by week_id so MERGE stays idempotent; + # the snapshot itself has no natural standalone PK, so we embed week_id + # as a surrogate for idempotency only — it is not exposed as a schema key. + session.run( + """ + UNWIND $weeks AS w + MATCH (wk:Week {week_id: w.week_id}) + MERGE (cs:CapacitySnapshot {week_id: w.week_id}) + ON CREATE SET + cs.own_hours = w.own_hours, + cs.hired_hours = w.hired_hours, + cs.overtime_hours = w.overtime_hours, + cs.total_capacity = w.total_capacity, + cs.total_planned = w.total_planned, + cs.deficit = w.deficit + MERGE (wk)-[:HAS_SNAPSHOT]->(cs) + """, + weeks=weeks, + ) + print(f" ✓ {len(weeks)} CapacitySnapshot nodes") + print(f" ✓ HAS_SNAPSHOT ({len(weeks)} edges)") + + +def seed_workers_and_certs(session, rows: list[dict]) -> None: + """ + From factory_workers.csv create: + Worker, Certification nodes + and: + PRIMARILY_AT, CAN_COVER, HOLDS, WORKED_ON relationships. + Also derives REQUIRES_CERT (Station → Certification). + """ + print(" Merging Worker / Certification nodes …") + + # ── Certification nodes ─────────────────── + # Collect all unique cert names across all workers + all_certs: set[str] = set() + for r in rows: + all_certs.update(split_field(r["certifications"])) + # Also include certs from STATION_CERT_MAP + for certs in STATION_CERT_MAP.values(): + all_certs.update(certs) + + cert_list = [ + { + "cert_id": cert.lower().replace(" ", "_").replace("/", "_"), + "name": cert, + "issuing_body": _issuing_body(cert), + } + for cert in sorted(all_certs) + ] + session.run( + """ + UNWIND $certs AS c + MERGE (n:Certification {cert_id: c.cert_id}) + ON CREATE SET + n.name = c.name, + n.issuing_body = c.issuing_body + """, + certs=cert_list, + ) + print(f" ✓ {len(cert_list)} Certification nodes") + + # ── Worker nodes ────────────────────────── + workers = [] + for r in rows: + # primary_station may be "all" for foremen — normalise to None + primary = r["primary_station"].strip() + workers.append( + { + "worker_id": r["worker_id"], + "name": r["name"], + "role": r["role"], + "hours_per_week": int(r["hours_per_week"]), + "type": r["type"], + "primary_station": None if primary == "all" else primary, + "can_cover": split_field(r["can_cover_stations"]), + "certifications": split_field(r["certifications"]), + } + ) + + session.run( + """ + UNWIND $workers AS w + MERGE (n:Worker {worker_id: w.worker_id}) + ON CREATE SET + n.name = w.name, + n.role = w.role, + n.hours_per_week = w.hours_per_week, + n.type = w.type + """, + workers=workers, + ) + print(f" ✓ {len(workers)} Worker nodes") + + # ── PRIMARILY_AT: Worker → Station ──────── + # Skip workers whose primary_station is "all" (Victor Elm, foreman) + primary_pairs = [ + {"worker_id": w["worker_id"], "station_code": w["primary_station"]} + for w in workers + if w["primary_station"] is not None + ] + session.run( + """ + UNWIND $pairs AS p + MATCH (w:Worker {worker_id: p.worker_id}) + MATCH (s:Station {station_code: p.station_code}) + MERGE (w)-[:PRIMARILY_AT]->(s) + """, + pairs=primary_pairs, + ) + print(f" ✓ PRIMARILY_AT ({len(primary_pairs)} edges)") + + # ── CAN_COVER: Worker → Station ─────────── + can_cover_pairs = [] + for w in workers: + for sc in w["can_cover"]: + can_cover_pairs.append({"worker_id": w["worker_id"], "station_code": sc}) + + session.run( + """ + UNWIND $pairs AS p + MATCH (w:Worker {worker_id: p.worker_id}) + MATCH (s:Station {station_code: p.station_code}) + MERGE (w)-[:CAN_COVER]->(s) + """, + pairs=can_cover_pairs, + ) + print(f" ✓ CAN_COVER ({len(can_cover_pairs)} edges)") + + # ── HOLDS: Worker → Certification ───────── + holds_pairs = [] + for w in workers: + for cert_name in w["certifications"]: + cert_id = cert_name.lower().replace(" ", "_").replace("/", "_") + holds_pairs.append({"worker_id": w["worker_id"], "cert_id": cert_id}) + + session.run( + """ + UNWIND $pairs AS p + MATCH (w:Worker {worker_id: p.worker_id}) + MATCH (c:Certification {cert_id: p.cert_id}) + MERGE (w)-[:HOLDS]->(c) + """, + pairs=holds_pairs, + ) + print(f" ✓ HOLDS ({len(holds_pairs)} edges)") + + # ── WORKED_ON: Worker → ProductionEntry ─── + # Heuristic: a worker WORKED_ON every ProductionEntry whose station_code + # is in their can_cover list (they could staff that station). + # The foreman (W11, can_cover all stations) is linked to all entries. + worked_on_pairs = [] + for w in workers: + cover_set = set(w["can_cover"]) + worked_on_pairs.append( + { + "worker_id": w["worker_id"], + "station_codes": list(cover_set), + } + ) + + session.run( + """ + UNWIND $workers AS w + MATCH (wk:Worker {worker_id: w.worker_id}) + MATCH (pe:ProductionEntry)-[:PROCESSED_AT]->(s:Station) + WHERE s.station_code IN w.station_codes + MERGE (wk)-[:WORKED_ON]->(pe) + """, + workers=worked_on_pairs, + ) + print(" ✓ WORKED_ON (all worker-station-entry assignments merged)") + + +def seed_requires_cert(session) -> None: + """ + Create REQUIRES_CERT: Station → Certification edges + using the domain-derived STATION_CERT_MAP. + """ + print(" Creating REQUIRES_CERT relationships …") + pairs = [] + for station_code, cert_names in STATION_CERT_MAP.items(): + for cert_name in cert_names: + cert_id = cert_name.lower().replace(" ", "_").replace("/", "_") + pairs.append({"station_code": station_code, "cert_id": cert_id}) + + session.run( + """ + UNWIND $pairs AS p + MATCH (s:Station {station_code: p.station_code}) + MATCH (c:Certification {cert_id: p.cert_id}) + MERGE (s)-[:REQUIRES_CERT]->(c) + """, + pairs=pairs, + ) + print(f" ✓ REQUIRES_CERT ({len(pairs)} edges)") + + +# ───────────────────────────────────────────── +# 4. Utility helpers +# ───────────────────────────────────────────── + +def _issuing_body(cert_name: str) -> str: + """Map a certification name to a plausible issuing body.""" + mapping = { + "MIG/MAG": "Swedish Welding Commission", + "TIG": "Swedish Welding Commission", + "ISO 9606": "ISO / SIS", + "Surface treatment": "Swedish Corrosion Institute", + "CE marking": "EU / Notified Body", + "Blasting": "Swedish Corrosion Institute", + "Surface protection": "Swedish Corrosion Institute", + "Hydraulics": "Internal", + "Mechanics": "Internal", + "Crane": "Swedish Work Environment Authority", + "SIS": "SIS Swedish Standards Institute", + "SS-EN 1090": "SIS Swedish Standards Institute", + "NDT": "BINDT / PCN", + "Casting": "Swedish Foundry Association", + "Formwork": "Internal", + "Spray painting": "Swedish Corrosion Institute", + "Sheet metal": "Internal", + "Assembly": "Internal", + "Welding": "Swedish Welding Commission", + "Leadership": "Internal", + "CE": "EU / Notified Body", + "ISO 9001": "ISO / SIS", + "ISO 9001,SS-EN 1090,Audit": "ISO / SIS", + "Audit": "Internal", + } + return mapping.get(cert_name, "Internal") + + +# ───────────────────────────────────────────── +# 5. Verification report +# ───────────────────────────────────────────── + +def print_graph_summary(session) -> None: + """Print a quick node / relationship count summary.""" + print("\n" + "═" * 50) + print(" Graph summary") + print("═" * 50) + + node_counts = session.run( + """ + CALL apoc.meta.stats() YIELD labels + RETURN labels + """ + ) + # Fall back to manual counts if APOC is not installed + labels = [ + "Project", "ProductionEntry", "Station", "Product", + "Worker", "Week", "CapacitySnapshot", "Certification", "BOP", + ] + total_nodes = 0 + for label in labels: + result = session.run(f"MATCH (n:{label}) RETURN count(n) AS c") + count = result.single()["c"] + total_nodes += count + print(f" {label:<22} {count:>4} nodes") + + print(f" {'─'*36}") + print(f" {'TOTAL':<22} {total_nodes:>4} nodes") + + rel_types = [ + "HAS_RUN", "USES_PRODUCT", "PROCESSED_AT", "SCHEDULED_IN", + "REQUIRES_STATION", "STRUCTURED_BY", "PRIMARILY_AT", "CAN_COVER", + "WORKED_ON", "HOLDS", "REQUIRES_CERT", "HAS_SNAPSHOT", + ] + total_rels = 0 + print() + for rel in rel_types: + result = session.run(f"MATCH ()-[r:{rel}]->() RETURN count(r) AS c") + count = result.single()["c"] + total_rels += count + print(f" [:{rel}]{' ' * max(1, 22 - len(rel))} {count:>4} edges") + + print(f" {'─'*36}") + print(f" {'TOTAL':<22} {total_rels:>4} relationships") + print("═" * 50) + print() + + targets_met = total_nodes >= 50 and total_rels >= 100 + if targets_met: + print(" ✅ Targets met: 50+ nodes, 100+ relationships, 8+ relationship types") + else: + print(f" ⚠️ Check targets — nodes: {total_nodes} / 50, rels: {total_rels} / 100") + print() + + +# ───────────────────────────────────────────── +# 6. Main entry point +# ───────────────────────────────────────────── + +def main() -> None: + # Validate CSV files exist + for path in (PRODUCTION_CSV, WORKERS_CSV, CAPACITY_CSV): + if not path.exists(): + print(f"ERROR: CSV not found: {path}", file=sys.stderr) + sys.exit(1) + + # Load CSVs + production_rows = load_csv(PRODUCTION_CSV) + workers_rows = load_csv(WORKERS_CSV) + capacity_rows = load_csv(CAPACITY_CSV) + + print(f"\nLoaded {len(production_rows)} production rows, " + f"{len(workers_rows)} worker rows, " + f"{len(capacity_rows)} capacity rows.\n") + + driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) + + with driver.session() as session: + print("── Step 1: Constraints ──────────────────────") + create_constraints(session) + + print("\n── Step 2: Weeks & Capacity Snapshots ───────") + seed_weeks_and_snapshots(session, capacity_rows) + + print("\n── Step 3: Projects / Products / Stations / BOPs / ProductionEntries ──") + seed_projects_products_stations_bops_entries(session, production_rows) + + print("\n── Step 4: Workers & Certifications ─────────") + seed_workers_and_certs(session, workers_rows) + + print("\n── Step 5: Station → Certification requirements ──") + seed_requires_cert(session) + + print_graph_summary(session) + + driver.close() + print("Done. Graph is ready.") + + +if __name__ == "__main__": + main() From 4b1da5f1bb53c0fe464d5e727456d7a67232caed Mon Sep 17 00:00:00 2001 From: Abhinav Chaudhary Date: Wed, 13 May 2026 02:18:45 +0530 Subject: [PATCH 3/6] Create .env.example --- submissions/abhinav-chaudhary/level6/.env.example | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 submissions/abhinav-chaudhary/level6/.env.example diff --git a/submissions/abhinav-chaudhary/level6/.env.example b/submissions/abhinav-chaudhary/level6/.env.example new file mode 100644 index 000000000..fae1dac40 --- /dev/null +++ b/submissions/abhinav-chaudhary/level6/.env.example @@ -0,0 +1,3 @@ +NEO4J_URI= +NEO4J_USER= +NEO4J_PASSWORD= From 7344b62d5fc6a27b24ef5ed00a5f77a5720c7d88 Mon Sep 17 00:00:00 2001 From: Abhinav Chaudhary Date: Wed, 13 May 2026 02:55:16 +0530 Subject: [PATCH 4/6] Added Dev Container Folder --- .devcontainer/devcontainer.json | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .devcontainer/devcontainer.json diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000..c2c6c253b --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,33 @@ +{ + "name": "Python 3", + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bookworm", + "customizations": { + "codespaces": { + "openFiles": [ + "README.md", + "agent/agent.py" + ] + }, + "vscode": { + "settings": {}, + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance" + ] + } + }, + "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y Date: Wed, 13 May 2026 03:07:12 +0530 Subject: [PATCH 5/6] Update app.py --- submissions/abhinav-chaudhary/level6/app.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/submissions/abhinav-chaudhary/level6/app.py b/submissions/abhinav-chaudhary/level6/app.py index c37a2ef2f..8ef006c21 100644 --- a/submissions/abhinav-chaudhary/level6/app.py +++ b/submissions/abhinav-chaudhary/level6/app.py @@ -472,7 +472,7 @@ def _colour_variance(val): "variance_pct": "Variance %", }) .style - .applymap(_colour_variance, subset=["Variance %"]) + .map(_colour_variance, subset=["Variance %"]) .format({"Planned h": "{:.1f}", "Actual h": "{:.1f}", "Variance %": "{:+.1f}"}) ) st.dataframe(styled, use_container_width=True, hide_index=True) @@ -645,7 +645,7 @@ def _red(val): "variance_pct": "Variance %", }) .style - .applymap(_red, subset=["Variance %"]) + .map(_red, subset=["Variance %"]) .format({"Planned h": "{:.1f}", "Actual h": "{:.1f}", "Variance %": "{:+.1f}"}) ) st.dataframe(styled_ov, use_container_width=True, hide_index=True) @@ -778,7 +778,7 @@ def _cap_colour(val): "deficit": "Deficit h", }) .style - .applymap(_cap_colour, subset=["Deficit h"]) + .map(_cap_colour, subset=["Deficit h"]) ) st.dataframe(styled_cap, use_container_width=True, hide_index=True) @@ -862,7 +862,7 @@ def _cov_depth(val): "coverage_depth": "Depth", }) .style - .applymap(_cov_depth, subset=["Depth"]) + .map(_cov_depth, subset=["Depth"]) ) st.dataframe(styled_cov, use_container_width=True, hide_index=True) From cc5f075c2bc8dce8f4aef1bd1e274771b4ca9501 Mon Sep 17 00:00:00 2001 From: Abhinav Chaudhary Date: Wed, 13 May 2026 03:10:01 +0530 Subject: [PATCH 6/6] Update DASHBOARD_URL.txt --- submissions/abhinav-chaudhary/level6/DASHBOARD_URL.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/submissions/abhinav-chaudhary/level6/DASHBOARD_URL.txt b/submissions/abhinav-chaudhary/level6/DASHBOARD_URL.txt index e69de29bb..9b4e88c30 100644 --- a/submissions/abhinav-chaudhary/level6/DASHBOARD_URL.txt +++ b/submissions/abhinav-chaudhary/level6/DASHBOARD_URL.txt @@ -0,0 +1 @@ +https://abhinav-factorygraph.streamlit.app/