From b548e0715b7a6e49b4883b7065c3d5595b65074e Mon Sep 17 00:00:00 2001 From: Girik1105 Date: Tue, 19 May 2026 16:26:44 -0700 Subject: [PATCH 1/4] [HOP-65] Removed duplicate 'Upload zip of PDFs' title --- hospexplorer/ask/templates/admin/ask/pdfresource/upload_zip.html | 1 - 1 file changed, 1 deletion(-) diff --git a/hospexplorer/ask/templates/admin/ask/pdfresource/upload_zip.html b/hospexplorer/ask/templates/admin/ask/pdfresource/upload_zip.html index 3b11f2c..4be7d0e 100644 --- a/hospexplorer/ask/templates/admin/ask/pdfresource/upload_zip.html +++ b/hospexplorer/ask/templates/admin/ask/pdfresource/upload_zip.html @@ -10,7 +10,6 @@ {% endblock %} {% block content %} -

{{ title }}

Upload a .zip containing PDF files and a single CSV metadata file with columns {{ required_columns_label }}. Each row creates a PDF Resource From 229483706e9e3409fee3e93bbce81d8872df4ee1 Mon Sep 17 00:00:00 2001 From: Girik1105 Date: Wed, 20 May 2026 16:29:06 -0700 Subject: [PATCH 2/4] [HOP-65] Duplicates matched via filename and title --- hospexplorer/ask/admin.py | 20 +++++++++-- .../0013_pdfresource_original_filename.py | 35 +++++++++++++++++++ hospexplorer/ask/models.py | 3 ++ 3 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 hospexplorer/ask/migrations/0013_pdfresource_original_filename.py diff --git a/hospexplorer/ask/admin.py b/hospexplorer/ask/admin.py index 52bfa4e..c5fe34d 100644 --- a/hospexplorer/ask/admin.py +++ b/hospexplorer/ask/admin.py @@ -271,6 +271,9 @@ def save_model(self, request, obj, form, change): obj.modifier = request.user obj.status = PDFResource.Status.PROCESSING obj.status_message = "Queued for Knowledge Base upload." + # capture the original name before storage save() mangles it on collision + if not change or "file" in form.changed_data: + obj.original_filename = os.path.basename(obj.file.name) super().save_model(request, obj, form, change) transaction.on_commit( @@ -347,6 +350,12 @@ def _is_real(name): for n in real_names: zip_members.setdefault(os.path.basename(n), n) + # a PDF "already exists" when both its original filename and + # title match a row already imported + existing_pdfs = set( + PDFResource.objects.values_list("original_filename", "title") + ) + total = 0 saved = 0 queued_ids = [] @@ -361,7 +370,12 @@ def _is_real(name): ) continue - member = zip_members.get(filename) or zip_members.get(os.path.basename(filename)) + basename = os.path.basename(filename) + if (basename, title) in existing_pdfs: + messages.warning(request, f"Row {total}: '{filename}' already exists; skipped.") + continue + + member = zip_members.get(filename) or zip_members.get(basename) if not member: messages.warning(request, f"Row {total}: '{filename}' not in zip; skipped.") continue @@ -374,13 +388,15 @@ def _is_real(name): obj = PDFResource( title=title, + original_filename=basename, creator=request.user, modifier=request.user, status=PDFResource.Status.PROCESSING, status_message="Queued for Knowledge Base upload.", ) - obj.file.save(os.path.basename(filename), ContentFile(pdf_bytes), save=True) + obj.file.save(basename, ContentFile(pdf_bytes), save=True) saved += 1 + existing_pdfs.add((basename, title)) queued_ids.append(obj.pk) # fire KB uploads after the request transaction commits so background diff --git a/hospexplorer/ask/migrations/0013_pdfresource_original_filename.py b/hospexplorer/ask/migrations/0013_pdfresource_original_filename.py new file mode 100644 index 0000000..5c7aa5c --- /dev/null +++ b/hospexplorer/ask/migrations/0013_pdfresource_original_filename.py @@ -0,0 +1,35 @@ +import os +import re + +from django.db import migrations, models + + +# Django's storage appends "_<7 random alphanumeric chars>" to a file name +# whenever the target name already exists. Strip that to recover the original. +_STORAGE_SUFFIX = re.compile(r"_[A-Za-z0-9]{7}$") + + +def backfill_original_filename(apps, schema_editor): + PDFResource = apps.get_model("ask", "PDFResource") + for resource in PDFResource.objects.all(): + if not resource.file or resource.original_filename: + continue + root, ext = os.path.splitext(os.path.basename(resource.file.name)) + resource.original_filename = f"{_STORAGE_SUFFIX.sub('', root)}{ext}" + resource.save(update_fields=["original_filename"]) + + +class Migration(migrations.Migration): + + dependencies = [ + ("ask", "0012_pdfresource_status_pdfresource_status_message_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="pdfresource", + name="original_filename", + field=models.CharField(blank=True, default="", max_length=255), + ), + migrations.RunPython(backfill_original_filename, migrations.RunPython.noop), + ] diff --git a/hospexplorer/ask/models.py b/hospexplorer/ask/models.py index 70fa94b..f80d37d 100644 --- a/hospexplorer/ask/models.py +++ b/hospexplorer/ask/models.py @@ -52,6 +52,9 @@ class Meta: class PDFResource(Resource): file = models.FileField(upload_to="kb_pdfs/") + # Original uploaded filename, kept verbatim — file.name carries a storage-added + # suffix on collision, so it can't be used to detect re-uploads of the same file. + original_filename = models.CharField(max_length=255, blank=True, default="") mcp_kb_document_id = models.IntegerField(null=True, blank=True, help_text="Document ID returned by the MCP Knowledge Base.") class Meta: From 00ee781cdbc420173dc317c44006df2343710d85 Mon Sep 17 00:00:00 2001 From: Girik1105 Date: Thu, 21 May 2026 11:15:10 -0700 Subject: [PATCH 3/4] [HOP-65] Tests work, better comments --- hospexplorer/ask/admin.py | 7 +++++-- hospexplorer/ask/models.py | 3 +-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/hospexplorer/ask/admin.py b/hospexplorer/ask/admin.py index c5fe34d..04e423d 100644 --- a/hospexplorer/ask/admin.py +++ b/hospexplorer/ask/admin.py @@ -271,9 +271,12 @@ def save_model(self, request, obj, form, change): obj.modifier = request.user obj.status = PDFResource.Status.PROCESSING obj.status_message = "Queued for Knowledge Base upload." - # capture the original name before storage save() mangles it on collision + + # record the original name so the zip upload's duplicate check sees PDFs + # added through this form too; do it before save() mangles file.name on collision if not change or "file" in form.changed_data: obj.original_filename = os.path.basename(obj.file.name) + super().save_model(request, obj, form, change) transaction.on_commit( @@ -350,7 +353,7 @@ def _is_real(name): for n in real_names: zip_members.setdefault(os.path.basename(n), n) - # a PDF "already exists" when both its original filename and + # a PDF already exists when both its original filename and # title match a row already imported existing_pdfs = set( PDFResource.objects.values_list("original_filename", "title") diff --git a/hospexplorer/ask/models.py b/hospexplorer/ask/models.py index f80d37d..aefb517 100644 --- a/hospexplorer/ask/models.py +++ b/hospexplorer/ask/models.py @@ -52,8 +52,7 @@ class Meta: class PDFResource(Resource): file = models.FileField(upload_to="kb_pdfs/") - # Original uploaded filename, kept verbatim — file.name carries a storage-added - # suffix on collision, so it can't be used to detect re-uploads of the same file. + # original upload name, kept so re-uploads can be skipped — Django renames file.name on collision original_filename = models.CharField(max_length=255, blank=True, default="") mcp_kb_document_id = models.IntegerField(null=True, blank=True, help_text="Document ID returned by the MCP Knowledge Base.") From 846c5aa4b3c298c310314e0169554bb4a4df6a8d Mon Sep 17 00:00:00 2001 From: Girik1105 Date: Fri, 22 May 2026 15:34:15 -0700 Subject: [PATCH 4/4] [HOP-65] blanks on files fixed --- .../0013_pdfresource_original_filename.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/hospexplorer/ask/migrations/0013_pdfresource_original_filename.py b/hospexplorer/ask/migrations/0013_pdfresource_original_filename.py index 5c7aa5c..7161cfa 100644 --- a/hospexplorer/ask/migrations/0013_pdfresource_original_filename.py +++ b/hospexplorer/ask/migrations/0013_pdfresource_original_filename.py @@ -1,24 +1,6 @@ -import os -import re - from django.db import migrations, models -# Django's storage appends "_<7 random alphanumeric chars>" to a file name -# whenever the target name already exists. Strip that to recover the original. -_STORAGE_SUFFIX = re.compile(r"_[A-Za-z0-9]{7}$") - - -def backfill_original_filename(apps, schema_editor): - PDFResource = apps.get_model("ask", "PDFResource") - for resource in PDFResource.objects.all(): - if not resource.file or resource.original_filename: - continue - root, ext = os.path.splitext(os.path.basename(resource.file.name)) - resource.original_filename = f"{_STORAGE_SUFFIX.sub('', root)}{ext}" - resource.save(update_fields=["original_filename"]) - - class Migration(migrations.Migration): dependencies = [ @@ -31,5 +13,4 @@ class Migration(migrations.Migration): name="original_filename", field=models.CharField(blank=True, default="", max_length=255), ), - migrations.RunPython(backfill_original_filename, migrations.RunPython.noop), ]