From acb767d8403dc5447a59bee97cbee5de8d338e60 Mon Sep 17 00:00:00 2001
From: Prakhar-Sethi012 <jisethi37@gmail.com>
Date: Thu, 11 Jun 2026 01:41:09 +0530
Subject: [PATCH 1/3] saving my local changes

---
 .dockerignore            |   5 ++
 .env.example             |   6 ++
 .gitignore               |  54 ++++++++++++++
 Dockerfile               |  13 ++++
 alembic.ini              | 149 +++++++++++++++++++++++++++++++++++++++
 alembic/README           |   1 +
 alembic/env.py           |  79 +++++++++++++++++++++
 alembic/script.py.mako   |  28 ++++++++
 app/database/database.py |  23 ++++++
 app/database/models.py   |  17 +++++
 app/main.py              |   7 ++
 docker-compose.yml       |  31 ++++++++
 requirements.txt         | Bin 0 -> 920 bytes
 13 files changed, 413 insertions(+)
 create mode 100644 .dockerignore
 create mode 100644 .env.example
 create mode 100644 .gitignore
 create mode 100644 Dockerfile
 create mode 100644 alembic.ini
 create mode 100644 alembic/README
 create mode 100644 alembic/env.py
 create mode 100644 alembic/script.py.mako
 create mode 100644 app/database/database.py
 create mode 100644 app/database/models.py
 create mode 100644 app/main.py
 create mode 100644 docker-compose.yml
 create mode 100644 requirements.txt

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..dda73b3
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,5 @@
+.venv
+__pycache__
+.git
+.env
+alembic/versions
\ No newline at end of file
diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..ccc76e2
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,6 @@
+POSTGRES_USER=postgres
+POSTGRES_PASSWORD=postgres
+POSTGRES_DB=wheretf
+POSTGRES_HOST=localhost
+POSTGRES_PORT=5432
+DATABASE_URL=postgresql+psycopg://postgres:postgres@localhost:5432/wheretf
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ced36aa
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,54 @@
+# ======================
+# Virtual Environment
+# ======================
+.venv/
+venv/
+ENV/
+env/
+
+# ======================
+# Python cache
+# ======================
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+
+# ======================
+# Environment variables
+# ======================
+.env
+
+# ======================
+# IDEs
+# ======================
+.vscode/
+.idea/
+
+# ======================
+# Testing / coverage
+# ======================
+.pytest_cache/
+.coverage
+htmlcov/
+
+# ======================
+# Logs
+# ======================
+*.log
+
+# ======================
+# Alembic
+# ======================
+alembic/versions/__pycache__/
+
+# ======================
+# Docker
+# ======================
+docker-compose.override.yml
+
+# ======================
+# OS files
+# ======================
+.DS_Store
+Thumbs.db
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..194e62d
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,13 @@
+FROM python:3.13-slim
+
+WORKDIR /app
+
+COPY requirements.txt .
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+EXPOSE 8000
+
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
\ No newline at end of file
diff --git a/alembic.ini b/alembic.ini
new file mode 100644
index 0000000..d142807
--- /dev/null
+++ b/alembic.ini
@@ -0,0 +1,149 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts.
+# this is typically a path given in POSIX (e.g. forward slashes)
+# format, relative to the token %(here)s which refers to the location of this
+# ini file
+script_location = %(here)s/alembic
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+# Or organize into date-based subdirectories (requires recursive_version_locations = true)
+# file_template = %%(year)d/%%(month).2d/%%(day).2d_%%(hour).2d%%(minute).2d_%%(second).2d_%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.  for multiple paths, the path separator
+# is defined by "path_separator" below.
+prepend_sys_path = .
+
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the tzdata library which can be installed by adding
+# `alembic[tz]` to the pip requirements.
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to <script_location>/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "path_separator"
+# below.
+# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
+
+# path_separator; This indicates what character is used to split lists of file
+# paths, including version_locations and prepend_sys_path within configparser
+# files such as alembic.ini.
+# The default rendered in new alembic.ini files is "os", which uses os.pathsep
+# to provide os-dependent path splitting.
+#
+# Note that in order to support legacy alembic.ini files, this default does NOT
+# take place if path_separator is not present in alembic.ini.  If this
+# option is omitted entirely, fallback logic is as follows:
+#
+# 1. Parsing of the version_locations option falls back to using the legacy
+#    "version_path_separator" key, which if absent then falls back to the legacy
+#    behavior of splitting on spaces and/or commas.
+# 2. Parsing of the prepend_sys_path option falls back to the legacy
+#    behavior of splitting on spaces, commas, or colons.
+#
+# Valid values for path_separator are:
+#
+# path_separator = :
+# path_separator = ;
+# path_separator = space
+# path_separator = newline
+#
+# Use os.pathsep. Default configuration used for new projects.
+path_separator = os
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+# database URL.  This is consumed by the user-maintained env.py script only.
+# other means of configuring database URLs may be customized within the env.py
+# file.
+sqlalchemy.url = postgresql+psycopg://postgres:postgres@localhost:5432/wheretf
+
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
+# hooks = ruff
+# ruff.type = module
+# ruff.module = ruff
+# ruff.options = check --fix REVISION_SCRIPT_FILENAME
+
+# Alternatively, use the exec runner to execute a binary found on your PATH
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = ruff
+# ruff.options = check --fix REVISION_SCRIPT_FILENAME
+
+# Logging configuration.  This is also consumed by the user-maintained
+# env.py script only.
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARNING
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARNING
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/alembic/README b/alembic/README
new file mode 100644
index 0000000..98e4f9c
--- /dev/null
+++ b/alembic/README
@@ -0,0 +1 @@
+Generic single-database configuration.
\ No newline at end of file
diff --git a/alembic/env.py b/alembic/env.py
new file mode 100644
index 0000000..804ad87
--- /dev/null
+++ b/alembic/env.py
@@ -0,0 +1,79 @@
+from logging.config import fileConfig
+
+from sqlalchemy import engine_from_config
+from sqlalchemy import pool
+from app.database.database import Base
+from app.database import models
+from alembic import context
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+# from myapp import mymodel
+# target_metadata = mymodel.Base.metadata
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection, target_metadata=target_metadata
+        )
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/alembic/script.py.mako b/alembic/script.py.mako
new file mode 100644
index 0000000..1101630
--- /dev/null
+++ b/alembic/script.py.mako
@@ -0,0 +1,28 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    ${downgrades if downgrades else "pass"}
diff --git a/app/database/database.py b/app/database/database.py
new file mode 100644
index 0000000..89f8575
--- /dev/null
+++ b/app/database/database.py
@@ -0,0 +1,23 @@
+from dotenv import load_dotenv
+import os
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker, DeclarativeBase
+
+load_dotenv()
+
+DATABASE_URL = os.getenv(
+    "DATABASE_URL",
+    "postgresql+psycopg://postgres:postgres@db:5432/wheretf"
+)
+engine = create_engine(DATABASE_URL)
+
+SessionLocal = sessionmaker(
+    bind=engine,
+    autoflush=False,
+    autocommit=False,
+)
+
+
+class Base(DeclarativeBase):
+    pass
\ No newline at end of file
diff --git a/app/database/models.py b/app/database/models.py
new file mode 100644
index 0000000..186210f
--- /dev/null
+++ b/app/database/models.py
@@ -0,0 +1,17 @@
+from sqlalchemy import String, Text
+from sqlalchemy.orm import Mapped, mapped_column
+from pgvector.sqlalchemy import Vector
+
+from app.database.database import Base
+
+
+class Document(Base):
+    __tablename__ = "documents"
+
+    id: Mapped[int] = mapped_column(primary_key=True)
+
+    title: Mapped[str] = mapped_column(String(255))
+    content: Mapped[str] = mapped_column(Text)
+
+    # pgvector embedding (we'll generate later)
+    embedding: Mapped[list[float]] = mapped_column(Vector(1536))
\ No newline at end of file
diff --git a/app/main.py b/app/main.py
new file mode 100644
index 0000000..a4aee24
--- /dev/null
+++ b/app/main.py
@@ -0,0 +1,7 @@
+from fastapi import FastAPI
+app = FastAPI(
+    title="WhereTF Backend"
+)
+@app.get("/")
+def root():
+    return {"message": "WhereTF Backend is running"}
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..434e457
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,31 @@
+services:
+  db:
+    image: pgvector/pgvector:pg17
+    container_name: wheretf-db
+
+    environment:
+      POSTGRES_USER: postgres
+      POSTGRES_PASSWORD: postgres
+      POSTGRES_DB: wheretf
+
+    ports:
+      - "5432:5432"
+
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+
+  backend:
+    build: .
+    container_name: wheretf-backend
+
+    ports:
+      - "8000:8000"
+
+    depends_on:
+      - db
+
+    environment:
+      DATABASE_URL: postgresql+psycopg://postgres:postgres@db:5432/wheretf
+
+volumes:
+  postgres_data:
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0d5e2eb153765efd1147810345a59255a75fa77
GIT binary patch
literal 920
zcmZ`&T~ES55S(Wde+q>1r9SxJyFM6x0Kp2C0BzGA#`@#cnccfm9?a!{+|KOm&Mv<{
z8yvC432)dV$0JXMG4DM;=eWfUHYj;21pW%dzGK8&Mf8L?-T$!x9cuDg%P#DlaZ>J%
z%9X|De9tiJqd99+RR@cDMM8*~lXK*4Sk;MFm3tjyEDf>aYim{4%#z~Dzof#<Fa`Nr
zp3+<hhLZlmDv{0y^|Aq5+=+4bbfJvO;>a^GUify19mz|NP5W5G=$VeJh}%(Hm3hyp
zs*>>&5uL@R%pqsIs~QD9$;#Q8x=(!x;Tko&?#v%yQ+6r%jIQG&o4opHqMndLbjLom
z1EJp#RpfI@8Lu&GQ<t++_xk1A1s&*X#hkuL+NzRW2`$IO@LpDU#WS8botzBq>BSRG
zdl6^hb8g}km2a5KH^%pHPo;vo`fhD6<)k*SG)~$f*B^4$%=$&FYHcIKciQ(>JL8)X
c@NJ&yAL9;8*pD3WtY`MBz1IAOst<jJKPAhBumAu6

literal 0
HcmV?d00001


From a10709e73e776f9656c8a334693e5d104545a7e7 Mon Sep 17 00:00:00 2001
From: Prakhar-Sethi012 <jisethi37@gmail.com>
Date: Thu, 11 Jun 2026 16:24:42 +0530
Subject: [PATCH 2/3] feat: initialize modular database architecture with
 pgvector and separated alembic migrations

---
 alembic/env.py                                | 15 +++---
 ...76cb765_create_file_relationships_table.py | 42 +++++++++++++++++
 .../c1850b307113_create_files_table.py        | 46 +++++++++++++++++++
 .../cdcdffd36592_create_file_content_table.py | 46 +++++++++++++++++++
 app/{database => }/database.py                |  0
 app/database/models.py                        | 17 -------
 app/models/__init__.py                        |  3 ++
 app/models/content.py                         | 31 +++++++++++++
 app/models/file.py                            | 30 ++++++++++++
 app/models/relationship.py                    | 21 +++++++++
 app/schemas/__init__.py                       |  0
 app/schemas/document.py                       | 11 +++++
 docker-compose.yml                            |  2 +-
 13 files changed, 237 insertions(+), 27 deletions(-)
 create mode 100644 alembic/versions/2f98f76cb765_create_file_relationships_table.py
 create mode 100644 alembic/versions/c1850b307113_create_files_table.py
 create mode 100644 alembic/versions/cdcdffd36592_create_file_content_table.py
 rename app/{database => }/database.py (100%)
 delete mode 100644 app/database/models.py
 create mode 100644 app/models/__init__.py
 create mode 100644 app/models/content.py
 create mode 100644 app/models/file.py
 create mode 100644 app/models/relationship.py
 create mode 100644 app/schemas/__init__.py
 create mode 100644 app/schemas/document.py

diff --git a/alembic/env.py b/alembic/env.py
index 804ad87..5d4475a 100644
--- a/alembic/env.py
+++ b/alembic/env.py
@@ -1,9 +1,8 @@
 from logging.config import fileConfig
-
 from sqlalchemy import engine_from_config
-from sqlalchemy import pool
-from app.database.database import Base
-from app.database import models
+from sqlalchemy import pool,create_engine
+from app.database import Base
+import app.models
 from alembic import context
 
 # this is the Alembic Config object, which provides
@@ -58,12 +57,10 @@ def run_migrations_online() -> None:
     and associate a connection with the context.
 
     """
-    connectable = engine_from_config(
-        config.get_section(config.config_ini_section, {}),
-        prefix="sqlalchemy.",
-        poolclass=pool.NullPool,
-    )
+    
 
+# We force Alembic to use localhost and the exact password 'postgres'
+    connectable = create_engine("postgresql+psycopg://postgres:postgres@localhost:5433/wheretf")
     with connectable.connect() as connection:
         context.configure(
             connection=connection, target_metadata=target_metadata
diff --git a/alembic/versions/2f98f76cb765_create_file_relationships_table.py b/alembic/versions/2f98f76cb765_create_file_relationships_table.py
new file mode 100644
index 0000000..3e24c02
--- /dev/null
+++ b/alembic/versions/2f98f76cb765_create_file_relationships_table.py
@@ -0,0 +1,42 @@
+"""create_file_relationships_table
+
+Revision ID: 2f98f76cb765
+Revises: cdcdffd36592
+Create Date: 2026-06-11 16:18:13.091065
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = '2f98f76cb765'
+down_revision: Union[str, Sequence[str], None] = 'cdcdffd36592'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('file_relationships',
+    sa.Column('id', sa.UUID(), server_default=sa.text('gen_random_uuid()'), nullable=False),
+    sa.Column('source_file_id', sa.UUID(), nullable=False),
+    sa.Column('target_file_id', sa.UUID(), nullable=False),
+    sa.Column('similarity_score', sa.Float(), nullable=False),
+    sa.Column('relation_type', sa.String(length=50), server_default='semantic_similarity', nullable=False),
+    sa.ForeignKeyConstraint(['source_file_id'], ['files.id'], ondelete='CASCADE'),
+    sa.ForeignKeyConstraint(['target_file_id'], ['files.id'], ondelete='CASCADE'),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('source_file_id', 'target_file_id', name='uq_file_relationship')
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('file_relationships')
+    # ### end Alembic commands ###
diff --git a/alembic/versions/c1850b307113_create_files_table.py b/alembic/versions/c1850b307113_create_files_table.py
new file mode 100644
index 0000000..5822e0d
--- /dev/null
+++ b/alembic/versions/c1850b307113_create_files_table.py
@@ -0,0 +1,46 @@
+"""create_files_table
+
+Revision ID: c1850b307113
+Revises: 
+Create Date: 2026-06-11 16:12:34.237674
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision: str = 'c1850b307113'
+down_revision: Union[str, Sequence[str], None] = None
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('files',
+    sa.Column('id', sa.UUID(), server_default=sa.text('gen_random_uuid()'), nullable=False),
+    sa.Column('file_path', sa.Text(), nullable=False),
+    sa.Column('file_hash', sa.String(length=64), nullable=False),
+    sa.Column('mime_type', sa.String(length=50), nullable=False),
+    sa.Column('last_modified', sa.DateTime(timezone=True), nullable=False),
+    sa.Column('indexed_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
+    sa.Column('tags', postgresql.ARRAY(sa.Text()), server_default='{}', nullable=False),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('file_path')
+    )
+    op.create_index('files_path_hash_idx', 'files', ['file_path', 'file_hash'], unique=False)
+    op.create_index('files_tags_idx', 'files', ['tags'], unique=False, postgresql_using='gin')
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index('files_tags_idx', table_name='files', postgresql_using='gin')
+    op.drop_index('files_path_hash_idx', table_name='files')
+    op.drop_table('files')
+    # ### end Alembic commands ###
diff --git a/alembic/versions/cdcdffd36592_create_file_content_table.py b/alembic/versions/cdcdffd36592_create_file_content_table.py
new file mode 100644
index 0000000..6707dea
--- /dev/null
+++ b/alembic/versions/cdcdffd36592_create_file_content_table.py
@@ -0,0 +1,46 @@
+"""create_file_content_table
+
+Revision ID: cdcdffd36592
+Revises: c1850b307113
+Create Date: 2026-06-11 16:14:39.195041
+
+"""
+from typing import Sequence, Union
+import pgvector
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision: str = 'cdcdffd36592'
+down_revision: Union[str, Sequence[str], None] = 'c1850b307113'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    op.execute("CREATE EXTENSION IF NOT EXISTS vector;")
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('file_content',
+    sa.Column('id', sa.UUID(), server_default=sa.text('gen_random_uuid()'), nullable=False),
+    sa.Column('file_id', sa.UUID(), nullable=False),
+    sa.Column('chunk_index', sa.Integer(), nullable=False),
+    sa.Column('content_text', sa.Text(), nullable=False),
+    sa.Column('embedding', pgvector.sqlalchemy.vector.VECTOR(dim=384), nullable=False),
+    sa.Column('keyword_tokens', postgresql.TSVECTOR(), sa.Computed("to_tsvector('english', content_text)", persisted=True), nullable=True),
+    sa.ForeignKeyConstraint(['file_id'], ['files.id'], ondelete='CASCADE'),
+    sa.PrimaryKeyConstraint('id')
+    )
+    op.create_index('content_embedding_idx', 'file_content', ['embedding'], unique=False, postgresql_using='hnsw', postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'vector_cosine_ops'})
+    op.create_index('content_fts_idx', 'file_content', ['keyword_tokens'], unique=False, postgresql_using='gin')
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index('content_fts_idx', table_name='file_content', postgresql_using='gin')
+    op.drop_index('content_embedding_idx', table_name='file_content', postgresql_using='hnsw', postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'vector_cosine_ops'})
+    op.drop_table('file_content')
+    # ### end Alembic commands ###
diff --git a/app/database/database.py b/app/database.py
similarity index 100%
rename from app/database/database.py
rename to app/database.py
diff --git a/app/database/models.py b/app/database/models.py
deleted file mode 100644
index 186210f..0000000
--- a/app/database/models.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from sqlalchemy import String, Text
-from sqlalchemy.orm import Mapped, mapped_column
-from pgvector.sqlalchemy import Vector
-
-from app.database.database import Base
-
-
-class Document(Base):
-    __tablename__ = "documents"
-
-    id: Mapped[int] = mapped_column(primary_key=True)
-
-    title: Mapped[str] = mapped_column(String(255))
-    content: Mapped[str] = mapped_column(Text)
-
-    # pgvector embedding (we'll generate later)
-    embedding: Mapped[list[float]] = mapped_column(Vector(1536))
\ No newline at end of file
diff --git a/app/models/__init__.py b/app/models/__init__.py
new file mode 100644
index 0000000..bf82e82
--- /dev/null
+++ b/app/models/__init__.py
@@ -0,0 +1,3 @@
+from app.models.file import File
+from app.models.content import FileContent
+from app.models.relationship import FileRelationship
\ No newline at end of file
diff --git a/app/models/content.py b/app/models/content.py
new file mode 100644
index 0000000..0690482
--- /dev/null
+++ b/app/models/content.py
@@ -0,0 +1,31 @@
+import uuid
+from sqlalchemy import Integer, Text, ForeignKey, Index, Computed
+from sqlalchemy.dialects.postgresql import UUID, TSVECTOR
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+from pgvector.sqlalchemy import Vector
+from sqlalchemy.sql import func
+from app.database import Base
+
+class FileContent(Base):
+    __tablename__ = "file_content"
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, server_default=func.gen_random_uuid())
+    file_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), ForeignKey("files.id", ondelete="CASCADE"))
+    
+    chunk_index: Mapped[int] = mapped_column(Integer, nullable=False)
+    content_text: Mapped[str] = mapped_column(Text, nullable=False)
+    
+    # 384 dimensions for all-MiniLM-L6-v2
+    embedding: Mapped[list[float]] = mapped_column(Vector(384))
+    
+    # Generated column for Full Text Search
+    keyword_tokens = mapped_column(TSVECTOR, Computed("to_tsvector('english', content_text)", persisted=True))
+
+    # Link back to the parent file
+    file = relationship("File", back_populates="contents")
+
+    # Indexes from the image
+    __table_args__ = (
+        Index('content_fts_idx', 'keyword_tokens', postgresql_using='gin'),
+        Index('content_embedding_idx', 'embedding', postgresql_using='hnsw', postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'vector_cosine_ops'}),
+    )
\ No newline at end of file
diff --git a/app/models/file.py b/app/models/file.py
new file mode 100644
index 0000000..cd02ede
--- /dev/null
+++ b/app/models/file.py
@@ -0,0 +1,30 @@
+import uuid
+from datetime import datetime
+from sqlalchemy import String, Text, DateTime, Index
+from sqlalchemy.dialects.postgresql import UUID, ARRAY
+from sqlalchemy.sql import func
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+
+from app.database import Base
+
+class File(Base):
+    __tablename__ = "files"
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, server_default=func.gen_random_uuid())
+    file_path: Mapped[str] = mapped_column(Text, unique=True, nullable=False)
+    file_hash: Mapped[str] = mapped_column(String(64), nullable=False) # SHA-256
+    mime_type: Mapped[str] = mapped_column(String(50), nullable=False)
+    
+    last_modified: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
+    indexed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
+    
+    tags: Mapped[list[str]] = mapped_column(ARRAY(Text), server_default='{}')
+
+    # Link to the contents table
+    contents = relationship("FileContent", back_populates="file", cascade="all, delete-orphan")
+
+    # Indexes from the image
+    __table_args__ = (
+        Index('files_tags_idx', 'tags', postgresql_using='gin'),
+        Index('files_path_hash_idx', 'file_path', 'file_hash'),
+    )
\ No newline at end of file
diff --git a/app/models/relationship.py b/app/models/relationship.py
new file mode 100644
index 0000000..7692777
--- /dev/null
+++ b/app/models/relationship.py
@@ -0,0 +1,21 @@
+import uuid
+from sqlalchemy import Float, String, ForeignKey, UniqueConstraint
+from sqlalchemy.dialects.postgresql import UUID
+from sqlalchemy.sql import func
+from sqlalchemy.orm import Mapped, mapped_column
+
+from app.database import Base
+
+class FileRelationship(Base):
+    __tablename__ = "file_relationships"
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, server_default=func.gen_random_uuid())
+    source_file_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), ForeignKey("files.id", ondelete="CASCADE"))
+    target_file_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), ForeignKey("files.id", ondelete="CASCADE"))
+    
+    similarity_score: Mapped[float] = mapped_column(Float, nullable=False)
+    relation_type: Mapped[str] = mapped_column(String(50), server_default='semantic_similarity')
+
+    __table_args__ = (
+        UniqueConstraint('source_file_id', 'target_file_id', name='uq_file_relationship'),
+    )
\ No newline at end of file
diff --git a/app/schemas/__init__.py b/app/schemas/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/schemas/document.py b/app/schemas/document.py
new file mode 100644
index 0000000..8312171
--- /dev/null
+++ b/app/schemas/document.py
@@ -0,0 +1,11 @@
+from pydantic import BaseModel
+
+
+class DocumentIngest(BaseModel):
+    file_path: str
+    raw_text: str
+class DocumentSearchResponse(BaseModel):
+    filename: str
+    file_path: str
+    snippet: str
+    score: float
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
index 434e457..767fc1f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -9,7 +9,7 @@ services:
       POSTGRES_DB: wheretf
 
     ports:
-      - "5432:5432"
+      - "5433:5432"
 
     volumes:
       - postgres_data:/var/lib/postgresql/data

From f51d5cbcbafe2b150c5e9f57043b36f38a5638a7 Mon Sep 17 00:00:00 2001
From: Maneet Gupta <131141424+RK-NerdyBirdy@users.noreply.github.com>
Date: Fri, 12 Jun 2026 20:26:16 +0530
Subject: [PATCH 3/3] Enhance db service with healthcheck and update backend
 dependency

Added healthcheck for the database service and updated backend dependency condition.
---
 docker-compose.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 767fc1f..5b0bee0 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,30 +2,30 @@ services:
   db:
     image: pgvector/pgvector:pg17
     container_name: wheretf-db
-
     environment:
       POSTGRES_USER: postgres
       POSTGRES_PASSWORD: postgres
       POSTGRES_DB: wheretf
-
     ports:
       - "5433:5432"
-
     volumes:
       - postgres_data:/var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U postgres -d wheretf"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
 
   backend:
     build: .
     container_name: wheretf-backend
-
     ports:
       - "8000:8000"
-
     depends_on:
-      - db
-
+      db:
+        condition: service_healthy
     environment:
       DATABASE_URL: postgresql+psycopg://postgres:postgres@db:5432/wheretf
 
 volumes:
-  postgres_data:
\ No newline at end of file
+  postgres_data: