diff --git a/hospexplorer/ask/admin.py b/hospexplorer/ask/admin.py index 52bfa4e..04e423d 100644 --- a/hospexplorer/ask/admin.py +++ b/hospexplorer/ask/admin.py @@ -271,6 +271,12 @@ def save_model(self, request, obj, form, change): obj.modifier = request.user obj.status = PDFResource.Status.PROCESSING obj.status_message = "Queued for Knowledge Base upload." + + # record the original name so the zip upload's duplicate check sees PDFs + # added through this form too; do it before save() mangles file.name on collision + if not change or "file" in form.changed_data: + obj.original_filename = os.path.basename(obj.file.name) + super().save_model(request, obj, form, change) transaction.on_commit( @@ -347,6 +353,12 @@ def _is_real(name): for n in real_names: zip_members.setdefault(os.path.basename(n), n) + # a PDF already exists when both its original filename and + # title match a row already imported + existing_pdfs = set( + PDFResource.objects.values_list("original_filename", "title") + ) + total = 0 saved = 0 queued_ids = [] @@ -361,7 +373,12 @@ def _is_real(name): ) continue - member = zip_members.get(filename) or zip_members.get(os.path.basename(filename)) + basename = os.path.basename(filename) + if (basename, title) in existing_pdfs: + messages.warning(request, f"Row {total}: '{filename}' already exists; skipped.") + continue + + member = zip_members.get(filename) or zip_members.get(basename) if not member: messages.warning(request, f"Row {total}: '{filename}' not in zip; skipped.") continue @@ -374,13 +391,15 @@ def _is_real(name): obj = PDFResource( title=title, + original_filename=basename, creator=request.user, modifier=request.user, status=PDFResource.Status.PROCESSING, status_message="Queued for Knowledge Base upload.", ) - obj.file.save(os.path.basename(filename), ContentFile(pdf_bytes), save=True) + obj.file.save(basename, ContentFile(pdf_bytes), save=True) saved += 1 + existing_pdfs.add((basename, title)) queued_ids.append(obj.pk) # fire KB uploads after the request transaction commits so background diff --git a/hospexplorer/ask/migrations/0013_pdfresource_original_filename.py b/hospexplorer/ask/migrations/0013_pdfresource_original_filename.py new file mode 100644 index 0000000..7161cfa --- /dev/null +++ b/hospexplorer/ask/migrations/0013_pdfresource_original_filename.py @@ -0,0 +1,16 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("ask", "0012_pdfresource_status_pdfresource_status_message_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="pdfresource", + name="original_filename", + field=models.CharField(blank=True, default="", max_length=255), + ), + ] diff --git a/hospexplorer/ask/models.py b/hospexplorer/ask/models.py index 70fa94b..aefb517 100644 --- a/hospexplorer/ask/models.py +++ b/hospexplorer/ask/models.py @@ -52,6 +52,8 @@ class Meta: class PDFResource(Resource): file = models.FileField(upload_to="kb_pdfs/") + # original upload name, kept so re-uploads can be skipped — Django renames file.name on collision + original_filename = models.CharField(max_length=255, blank=True, default="") mcp_kb_document_id = models.IntegerField(null=True, blank=True, help_text="Document ID returned by the MCP Knowledge Base.") class Meta: diff --git a/hospexplorer/ask/templates/admin/ask/pdfresource/upload_zip.html b/hospexplorer/ask/templates/admin/ask/pdfresource/upload_zip.html index 3b11f2c..4be7d0e 100644 --- a/hospexplorer/ask/templates/admin/ask/pdfresource/upload_zip.html +++ b/hospexplorer/ask/templates/admin/ask/pdfresource/upload_zip.html @@ -10,7 +10,6 @@ {% endblock %} {% block content %} -

{{ title }}

Upload a .zip containing PDF files and a single CSV metadata file with columns {{ required_columns_label }}. Each row creates a PDF Resource