diff --git a/.gitignore b/.gitignore index 838db98..c661710 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,9 @@ logs/*.log # Django generated statics staticfiles +# Django generated media +media/ + # Virtual environments .venv diff --git a/hospexplorer/ask/admin.py b/hospexplorer/ask/admin.py index 35219fa..3052f24 100644 --- a/hospexplorer/ask/admin.py +++ b/hospexplorer/ask/admin.py @@ -2,8 +2,8 @@ from django.contrib import admin -from ask.models import Conversation, TermsAcceptance, QARecord, SimWorkflow, WebsiteResource -from ask.kb_connector import add_website_to_kb +from ask.models import Conversation, TermsAcceptance, QARecord, SimWorkflow, WebsiteResource, PDFResource +from ask.kb_connector import add_website_to_kb, add_pdf_to_kb logger = logging.getLogger(__name__) @@ -158,3 +158,28 @@ def save_model(self, request, obj, form, change): except Exception as e: logger.exception("Failed to send website to KB: %s", obj.url) self.message_user(request, f"Website saved but failed to send to Knowledge Base: {e}", level="warning") + + +@admin.register(PDFResource) +class PDFResourceAdmin(admin.ModelAdmin): + list_display = ("title", "file", "creator", "modified_at", "mcp_kb_document_id") + search_fields = ("title",) + readonly_fields = ("created_at", "modified_at", "creator", "modifier", "mcp_kb_document_id") + + def save_model(self, request, obj, form, change): + if not change: + obj.creator = request.user + obj.modifier = request.user + super().save_model(request, obj, form, change) + + try: + obj.file.open("rb") + file_bytes = obj.file.read() + obj.file.close() + result = add_pdf_to_kb(file_bytes, obj.file.name.split("/")[-1], obj.title) + obj.mcp_kb_document_id = result.get("doc_id") + obj.save(update_fields=["mcp_kb_document_id"]) + self.message_user(request, f"PDF '{obj.title}' sent to Knowledge Base (doc_id={obj.mcp_kb_document_id}).") + except Exception as e: + logger.exception("Failed to send PDF to KB: %s", obj.file.name) + self.message_user(request, f"PDF saved but failed to send to Knowledge Base: {e}", level="warning") diff --git a/hospexplorer/ask/kb_connector.py b/hospexplorer/ask/kb_connector.py index 469a71a..49a9a07 100644 --- a/hospexplorer/ask/kb_connector.py +++ b/hospexplorer/ask/kb_connector.py @@ -54,6 +54,36 @@ def add_website_to_kb(url): return response.json() +def add_pdf_to_kb(file_bytes, filename, title, url=None): + """Upload a PDF to the MCP KB server for ingestion. + + Calls POST /docs/pdf/add on the MCP KB server with multipart form data. + The KB server extracts text, chunks it, generates embeddings, + and stores it for semantic search. + """ + headers = { + "Authorization": f"Bearer {settings.KB_MCP_JWT_TOKEN}", + } + endpoint = f"{settings.KB_MCP_HOST}/docs/pdf/add" + + files = {"file": (filename, file_bytes, "application/pdf")} + data = {"title": title} + if url: + data["url"] = url + + with httpx.Client() as client: + response = client.post( + endpoint, + headers=headers, + files=files, + data=data, + timeout=settings.KB_MCP_TIMEOUT, + ) + + response.raise_for_status() + return response.json() + + def delete_kb_document(doc_id): """Delete a document from the MCP KB server by its ID. diff --git a/hospexplorer/ask/migrations/0011_pdfresource.py b/hospexplorer/ask/migrations/0011_pdfresource.py new file mode 100644 index 0000000..1f8ebb9 --- /dev/null +++ b/hospexplorer/ask/migrations/0011_pdfresource.py @@ -0,0 +1,34 @@ +# Generated by Django 6.0.2 on 2026-04-03 23:17 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('ask', '0010_websiteresource_mcp_kb_document_id'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='PDFResource', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('title', models.CharField(max_length=255)), + ('description', models.TextField(blank=True, default='')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('modified_at', models.DateTimeField(auto_now=True)), + ('file', models.FileField(upload_to='kb_pdfs/')), + ('mcp_kb_document_id', models.IntegerField(blank=True, help_text='Document ID returned by the MCP Knowledge Base.', null=True)), + ('creator', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='%(class)s_created', to=settings.AUTH_USER_MODEL)), + ('modifier', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_modified', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'verbose_name': 'PDF Resource', + 'verbose_name_plural': 'PDF Resources', + }, + ), + ] diff --git a/hospexplorer/ask/models.py b/hospexplorer/ask/models.py index e0ceeaa..53f186e 100644 --- a/hospexplorer/ask/models.py +++ b/hospexplorer/ask/models.py @@ -38,6 +38,15 @@ class Meta: verbose_name_plural = "Website Resources" +class PDFResource(Resource): + file = models.FileField(upload_to="kb_pdfs/") + mcp_kb_document_id = models.IntegerField(null=True, blank=True, help_text="Document ID returned by the MCP Knowledge Base.") + + class Meta: + verbose_name = "PDF Resource" + verbose_name_plural = "PDF Resources" + + class QueryTask(models.Model): class Status(models.TextChoices): PENDING = "pending", "Pending" diff --git a/hospexplorer/ask/templates/kb/resources.html b/hospexplorer/ask/templates/kb/resources.html index f69e49c..c856509 100644 --- a/hospexplorer/ask/templates/kb/resources.html +++ b/hospexplorer/ask/templates/kb/resources.html @@ -51,141 +51,276 @@

Knowledge Base Resources

- -
-
- - - - - - - - {# can_add/can_change/can_delete are Django's default model permissions — assign via admin or a "Curator" group #} - {% if can_change %}{% endif %} - - - - {% for resource in page_obj %} - - - - - - {% if can_change %} - - {% endif %} - - {% empty %} - - - - {% endfor %} - -
TitleURLLast IncludedKB StatusActions
{{ resource.title }} - - {{ resource.url|truncatechars:60 }} - - {{ resource.modified_at|date:"N j, Y, P" }} - -- - - -
No resources found in the internal database.
-
-
- - -
-
Documents in KB but not tracked internally
-
-
- - - - - - {% if can_add or can_delete %}{% endif %} - - - - - -
TitleURLActions
+ {% empty %} + + No PDF resources found. + + {% endfor %} + + +
-
- - - {% if page_obj.has_other_pages %} - - {% endif %} + +