From fa530e6b61332882e40d50b4e9269522db614a9f Mon Sep 17 00:00:00 2001 From: Girik1105 Date: Fri, 3 Apr 2026 16:38:06 -0700 Subject: [PATCH 1/3] [HOP-54] Added kb connector for adding pdf, changed kb compare to check pdfs in mcp that are not in hopper and vise versa, added views and urls for pulling pdfs from mcp, frontend --- .gitignore | 3 + hospexplorer/ask/admin.py | 29 +- hospexplorer/ask/kb_connector.py | 30 ++ .../ask/migrations/0011_pdfresource.py | 34 ++ hospexplorer/ask/models.py | 9 + hospexplorer/ask/templates/kb/resources.html | 491 +++++++++++++----- hospexplorer/ask/urls.py | 4 +- hospexplorer/ask/views.py | 145 +++++- hospexplorer/hospexplorer/settings.py | 5 + hospexplorer/hospexplorer/urls.py | 2 +- 10 files changed, 602 insertions(+), 150 deletions(-) create mode 100644 hospexplorer/ask/migrations/0011_pdfresource.py diff --git a/.gitignore b/.gitignore index 838db98..c661710 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,9 @@ logs/*.log # Django generated statics staticfiles +# Django generated media +media/ + # Virtual environments .venv diff --git a/hospexplorer/ask/admin.py b/hospexplorer/ask/admin.py index 35219fa..3052f24 100644 --- a/hospexplorer/ask/admin.py +++ b/hospexplorer/ask/admin.py @@ -2,8 +2,8 @@ from django.contrib import admin -from ask.models import Conversation, TermsAcceptance, QARecord, SimWorkflow, WebsiteResource -from ask.kb_connector import add_website_to_kb +from ask.models import Conversation, TermsAcceptance, QARecord, SimWorkflow, WebsiteResource, PDFResource +from ask.kb_connector import add_website_to_kb, add_pdf_to_kb logger = logging.getLogger(__name__) @@ -158,3 +158,28 @@ def save_model(self, request, obj, form, change): except Exception as e: logger.exception("Failed to send website to KB: %s", obj.url) self.message_user(request, f"Website saved but failed to send to Knowledge Base: {e}", level="warning") + + +@admin.register(PDFResource) +class PDFResourceAdmin(admin.ModelAdmin): + list_display = ("title", "file", "creator", "modified_at", "mcp_kb_document_id") + search_fields = ("title",) + readonly_fields = ("created_at", "modified_at", "creator", "modifier", "mcp_kb_document_id") + + def save_model(self, request, obj, form, change): + if not change: + obj.creator = request.user + obj.modifier = request.user + super().save_model(request, obj, form, change) + + try: + obj.file.open("rb") + file_bytes = obj.file.read() + obj.file.close() + result = add_pdf_to_kb(file_bytes, obj.file.name.split("/")[-1], obj.title) + obj.mcp_kb_document_id = result.get("doc_id") + obj.save(update_fields=["mcp_kb_document_id"]) + self.message_user(request, f"PDF '{obj.title}' sent to Knowledge Base (doc_id={obj.mcp_kb_document_id}).") + except Exception as e: + logger.exception("Failed to send PDF to KB: %s", obj.file.name) + self.message_user(request, f"PDF saved but failed to send to Knowledge Base: {e}", level="warning") diff --git a/hospexplorer/ask/kb_connector.py b/hospexplorer/ask/kb_connector.py index 469a71a..49a9a07 100644 --- a/hospexplorer/ask/kb_connector.py +++ b/hospexplorer/ask/kb_connector.py @@ -54,6 +54,36 @@ def add_website_to_kb(url): return response.json() +def add_pdf_to_kb(file_bytes, filename, title, url=None): + """Upload a PDF to the MCP KB server for ingestion. + + Calls POST /docs/pdf/add on the MCP KB server with multipart form data. + The KB server extracts text, chunks it, generates embeddings, + and stores it for semantic search. + """ + headers = { + "Authorization": f"Bearer {settings.KB_MCP_JWT_TOKEN}", + } + endpoint = f"{settings.KB_MCP_HOST}/docs/pdf/add" + + files = {"file": (filename, file_bytes, "application/pdf")} + data = {"title": title} + if url: + data["url"] = url + + with httpx.Client() as client: + response = client.post( + endpoint, + headers=headers, + files=files, + data=data, + timeout=settings.KB_MCP_TIMEOUT, + ) + + response.raise_for_status() + return response.json() + + def delete_kb_document(doc_id): """Delete a document from the MCP KB server by its ID. diff --git a/hospexplorer/ask/migrations/0011_pdfresource.py b/hospexplorer/ask/migrations/0011_pdfresource.py new file mode 100644 index 0000000..1f8ebb9 --- /dev/null +++ b/hospexplorer/ask/migrations/0011_pdfresource.py @@ -0,0 +1,34 @@ +# Generated by Django 6.0.2 on 2026-04-03 23:17 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('ask', '0010_websiteresource_mcp_kb_document_id'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='PDFResource', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('title', models.CharField(max_length=255)), + ('description', models.TextField(blank=True, default='')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('modified_at', models.DateTimeField(auto_now=True)), + ('file', models.FileField(upload_to='kb_pdfs/')), + ('mcp_kb_document_id', models.IntegerField(blank=True, help_text='Document ID returned by the MCP Knowledge Base.', null=True)), + ('creator', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='%(class)s_created', to=settings.AUTH_USER_MODEL)), + ('modifier', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_modified', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'verbose_name': 'PDF Resource', + 'verbose_name_plural': 'PDF Resources', + }, + ), + ] diff --git a/hospexplorer/ask/models.py b/hospexplorer/ask/models.py index e0ceeaa..53f186e 100644 --- a/hospexplorer/ask/models.py +++ b/hospexplorer/ask/models.py @@ -38,6 +38,15 @@ class Meta: verbose_name_plural = "Website Resources" +class PDFResource(Resource): + file = models.FileField(upload_to="kb_pdfs/") + mcp_kb_document_id = models.IntegerField(null=True, blank=True, help_text="Document ID returned by the MCP Knowledge Base.") + + class Meta: + verbose_name = "PDF Resource" + verbose_name_plural = "PDF Resources" + + class QueryTask(models.Model): class Status(models.TextChoices): PENDING = "pending", "Pending" diff --git a/hospexplorer/ask/templates/kb/resources.html b/hospexplorer/ask/templates/kb/resources.html index f69e49c..c856509 100644 --- a/hospexplorer/ask/templates/kb/resources.html +++ b/hospexplorer/ask/templates/kb/resources.html @@ -51,141 +51,276 @@

Knowledge Base Resources

- -
-
- - - - - - - - {# can_add/can_change/can_delete are Django's default model permissions — assign via admin or a "Curator" group #} - {% if can_change %}{% endif %} - - - - {% for resource in page_obj %} - - - - - - {% if can_change %} - - {% endif %} - - {% empty %} - - - - {% endfor %} - -
TitleURLLast IncludedKB StatusActions
{{ resource.title }} - - {{ resource.url|truncatechars:60 }} - - {{ resource.modified_at|date:"N j, Y, P" }} - -- - - -
No resources found in the internal database.
-
-
- - -
-
Documents in KB but not tracked internally
-
-
- - - - - - {% if can_add or can_delete %}{% endif %} - - - - - -
TitleURLActions
+ {% empty %} + + No PDF resources found. + + {% endfor %} + + +
-
- - - {% if page_obj.has_other_pages %} - - {% endif %} + +