Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions hospexplorer/ask/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,12 @@ def save_model(self, request, obj, form, change):
obj.modifier = request.user
obj.status = PDFResource.Status.PROCESSING
obj.status_message = "Queued for Knowledge Base upload."

# record the original name so the zip upload's duplicate check sees PDFs
# added through this form too; do it before save() mangles file.name on collision
if not change or "file" in form.changed_data:
obj.original_filename = os.path.basename(obj.file.name)

super().save_model(request, obj, form, change)

transaction.on_commit(
Expand Down Expand Up @@ -347,6 +353,12 @@ def _is_real(name):
for n in real_names:
zip_members.setdefault(os.path.basename(n), n)

# a PDF already exists when both its original filename and
# title match a row already imported
existing_pdfs = set(
PDFResource.objects.values_list("original_filename", "title")
)

total = 0
saved = 0
queued_ids = []
Expand All @@ -361,7 +373,12 @@ def _is_real(name):
)
continue

member = zip_members.get(filename) or zip_members.get(os.path.basename(filename))
basename = os.path.basename(filename)
if (basename, title) in existing_pdfs:
messages.warning(request, f"Row {total}: '{filename}' already exists; skipped.")
continue

member = zip_members.get(filename) or zip_members.get(basename)
if not member:
messages.warning(request, f"Row {total}: '{filename}' not in zip; skipped.")
continue
Expand All @@ -374,13 +391,15 @@ def _is_real(name):

obj = PDFResource(
title=title,
original_filename=basename,
creator=request.user,
modifier=request.user,
status=PDFResource.Status.PROCESSING,
status_message="Queued for Knowledge Base upload.",
)
obj.file.save(os.path.basename(filename), ContentFile(pdf_bytes), save=True)
obj.file.save(basename, ContentFile(pdf_bytes), save=True)
saved += 1
existing_pdfs.add((basename, title))
queued_ids.append(obj.pk)

# fire KB uploads after the request transaction commits so background
Expand Down
16 changes: 16 additions & 0 deletions hospexplorer/ask/migrations/0013_pdfresource_original_filename.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("ask", "0012_pdfresource_status_pdfresource_status_message_and_more"),
]

operations = [
migrations.AddField(
model_name="pdfresource",
name="original_filename",
field=models.CharField(blank=True, default="", max_length=255),
),
]
2 changes: 2 additions & 0 deletions hospexplorer/ask/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ class Meta:

class PDFResource(Resource):
file = models.FileField(upload_to="kb_pdfs/")
# original upload name, kept so re-uploads can be skipped — Django renames file.name on collision
original_filename = models.CharField(max_length=255, blank=True, default="")
mcp_kb_document_id = models.IntegerField(null=True, blank=True, help_text="Document ID returned by the MCP Knowledge Base.")

class Meta:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
{% endblock %}

{% block content %}
<h1>{{ title }}</h1>
<p>
Upload a <code>.zip</code> containing PDF files and a single CSV metadata file
with columns <code>{{ required_columns_label }}</code>. Each row creates a PDF Resource
Expand Down