Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/app/endpoints/conversations_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from authentication import get_auth_dependency
from authorization.middleware import authorize
from cache.cache_entry import CacheEntry
from configuration import configuration
from log import get_logger
from models.api.requests import ConversationUpdateRequest
Expand All @@ -24,7 +25,6 @@
ConversationsListResponseV2,
ConversationUpdateResponse,
)
from models.cache_entry import CacheEntry
from models.common import (
ConversationTurn,
Message,
Expand Down
2 changes: 1 addition & 1 deletion src/app/endpoints/responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@
)
from models.api.responses.successful import ResponsesResponse
from models.common.moderation import ShieldModerationBlocked
from models.common.responses.contexts import ResponsesContext
from models.common.responses.responses_api_params import ResponsesApiParams
from models.common.responses.responses_context import ResponsesContext
from models.common.turn_summary import TurnSummary
from models.config import Action
from observability import ResponsesEventData, build_responses_event, send_splunk_event
Expand Down
7 changes: 5 additions & 2 deletions src/app/endpoints/rlsapi_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from constants import ENDPOINT_PATH_INFER
from log import get_logger
from metrics import recording
from models.api.requests.rlsapi import RlsapiV1InferRequest, RlsapiV1SystemInfo
from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
from models.api.responses.error import (
ForbiddenResponse,
Expand All @@ -37,9 +38,11 @@
UnauthorizedResponse,
UnprocessableEntityResponse,
)
from models.api.responses.successful.rlsapi import (
RlsapiV1InferData,
RlsapiV1InferResponse,
)
from models.config import Action
from models.rlsapi.requests import RlsapiV1InferRequest, RlsapiV1SystemInfo
from models.rlsapi.responses import RlsapiV1InferData, RlsapiV1InferResponse
from observability import InferenceEventData, build_inference_event, send_splunk_event
from utils.endpoints import check_configuration_loaded
from utils.query import (
Expand Down
2 changes: 1 addition & 1 deletion src/app/endpoints/streaming_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,10 @@
UnprocessableEntityResponse,
)
from models.api.responses.successful import StreamingQueryResponse
from models.common.responses.contexts import ResponseGeneratorContext
from models.common.responses.responses_api_params import ResponsesApiParams
from models.common.turn_summary import ReferencedDocument, TurnSummary
from models.config import Action
from models.context import ResponseGeneratorContext
from utils.conversations import append_turn_items_to_conversation
from utils.endpoints import (
check_configuration_loaded,
Expand Down
3 changes: 3 additions & 0 deletions src/cache/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ Various cache implementations.
## [cache.py](cache.py)
Abstract class that is parent for all cache implementations.

## [cache_entry.py](cache_entry.py)
Pydantic model for a conversation history cache entry.

## [cache_error.py](cache_error.py)
Any exception that can occur during cache operations.

Expand Down
2 changes: 1 addition & 1 deletion src/cache/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from abc import ABC, abstractmethod

from models.cache_entry import CacheEntry
from cache.cache_entry import CacheEntry
from models.common import ConversationData
from utils.suid import check_suid

Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion src/cache/in_memory_cache.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""In-memory cache implementation."""

from cache.cache import Cache
from cache.cache_entry import CacheEntry
from log import get_logger
from models.cache_entry import CacheEntry
from models.common import ConversationData
from models.config import InMemoryCacheConfig
from utils.connection_decorator import connection
Expand Down
2 changes: 1 addition & 1 deletion src/cache/noop_cache.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""No-operation cache implementation."""

from cache.cache import Cache
from cache.cache_entry import CacheEntry
from log import get_logger
from models.cache_entry import CacheEntry
from models.common import ConversationData
from utils.connection_decorator import connection

Expand Down
2 changes: 1 addition & 1 deletion src/cache/postgres_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
from psycopg2.extensions import AsIs

from cache.cache import Cache
from cache.cache_entry import CacheEntry
from cache.cache_error import CacheError
from log import get_logger
from models.cache_entry import CacheEntry
from models.common import ConversationData
from models.common.turn_summary import (
ReferencedDocument,
Expand Down
2 changes: 1 addition & 1 deletion src/cache/sqlite_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from time import time

from cache.cache import Cache
from cache.cache_entry import CacheEntry
from cache.cache_error import CacheError
from log import get_logger
from models.cache_entry import CacheEntry
from models.common import ConversationData
from models.common.turn_summary import (
ReferencedDocument,
Expand Down
9 changes: 0 additions & 9 deletions src/models/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,9 @@
## [__init__.py](__init__.py)
Pydantic models.

## [cache_entry.py](cache_entry.py)
Model for conversation history cache entry.

## [config.py](config.py)
Model with service configuration.

## [context.py](context.py)
Context objects for internal operations.

## [api/](api/)
Models for API request and response bodies.

Expand All @@ -20,6 +14,3 @@ Shared models and types used across endpoints.

## [database/](database/)
SQLAlchemy ORM models.

## [rlsapi/](rlsapi/)
Models for the rlsapi v1 API surface.
10 changes: 10 additions & 0 deletions src/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,11 @@
"""Pydantic models."""

from models import api, common, database
from models.config import Configuration

__all__ = [
"Configuration",
"api",
"common",
"database",
]
3 changes: 3 additions & 0 deletions src/models/api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,8 @@
## [__init__.py](__init__.py)
Typed HTTP API models (OpenAPI-oriented) for FastAPI routes. Exposes the [`responses`](responses/README.md) subpackage.

## [requests/](requests/)
REST request body models, including [`requests/rlsapi.py`](requests/rlsapi.py) for the rlsapi v1 surface.

## [responses/](responses/README.md)
HTTP response shapes (successful payloads, errors, and OpenAPI description constants).
14 changes: 14 additions & 0 deletions src/models/api/requests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@
from models.api.requests.prompts import PromptCreateRequest, PromptUpdateRequest
from models.api.requests.query import QueryRequest, StreamingInterruptRequest
from models.api.requests.responses_openai import ResponsesRequest
from models.api.requests.rlsapi import (
RlsapiV1Attachment,
RlsapiV1CLA,
RlsapiV1Context,
RlsapiV1InferRequest,
RlsapiV1SystemInfo,
RlsapiV1Terminal,
)
from models.api.requests.vector_stores import (
VectorStoreCreateRequest,
VectorStoreFileCreateRequest,
Expand All @@ -23,6 +31,12 @@
"PromptUpdateRequest",
"QueryRequest",
"ResponsesRequest",
"RlsapiV1Attachment",
"RlsapiV1CLA",
"RlsapiV1Context",
"RlsapiV1InferRequest",
"RlsapiV1SystemInfo",
"RlsapiV1Terminal",
"StreamingInterruptRequest",
"VectorStoreCreateRequest",
"VectorStoreFileCreateRequest",
Expand Down
File renamed without changes.
3 changes: 3 additions & 0 deletions src/models/api/responses/successful/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,8 @@ Successful response models for synchronous query and streaming query documentati
## [responses_openai.py](responses_openai.py)
Successful response model for the OpenAI-compatible Responses API.

## [rlsapi.py](rlsapi.py)
Successful response payloads for the rlsapi v1 `/infer` endpoint.

## [vector_stores.py](vector_stores.py)
Successful responses for vector stores and vector store files.
6 changes: 6 additions & 0 deletions src/models/api/responses/successful/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@
StreamingQueryResponse,
)
from models.api.responses.successful.responses_openai import ResponsesResponse
from models.api.responses.successful.rlsapi import (
RlsapiV1InferData,
RlsapiV1InferResponse,
)
from models.api.responses.successful.vector_stores import (
FileResponse,
VectorStoreDeleteResponse,
Expand Down Expand Up @@ -83,6 +87,8 @@
"RAGListResponse",
"ReadinessResponse",
"ResponsesResponse",
"RlsapiV1InferData",
"RlsapiV1InferResponse",
"ShieldsResponse",
"StatusResponse",
"StreamingInterruptResponse",
Expand Down
4 changes: 2 additions & 2 deletions src/models/common/responses/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
## [responses_api_params.py](responses_api_params.py)
Request parameter model for Llama Stack responses API calls.

## [responses_context.py](responses_context.py)
Request-scoped context model for the responses endpoint pipeline.
## [contexts.py](contexts.py)
Context models for pipeline and streaming.

2 changes: 1 addition & 1 deletion src/models/common/responses/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Shared models for the OpenAI-compatible Responses API pipeline."""

from models.common.responses.contexts import ResponsesContext
from models.common.responses.responses_api_params import ResponsesApiParams
from models.common.responses.responses_context import ResponsesContext
from models.common.responses.responses_conversation_context import (
ResponsesConversationContext,
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""Request-scoped context model for the responses endpoint pipeline."""
"""Context objects for the responses endpoint pipeline and streaming query generators."""

from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional

from fastapi import BackgroundTasks
from llama_stack_client import AsyncLlamaStackClient
from pydantic import BaseModel, ConfigDict, Field

from models.api.requests import QueryRequest
from models.common.moderation import ShieldModerationResult
from models.common.turn_summary import RAGContext

Expand Down Expand Up @@ -53,3 +55,49 @@ class ResponsesContext(BaseModel):
default=False,
description="Whether to generate a topic summary for new conversations",
)


@dataclass
class ResponseGeneratorContext: # pylint: disable=too-many-instance-attributes
"""
Context object for response generator creation.

This class groups all the parameters needed to create a response generator
for streaming query endpoints, reducing function parameter count from 10 to 1.

Attributes:
conversation_id: The conversation identifier
request_id: Unique identifier for the streaming request
user_id: The user identifier
skip_userid_check: Whether to skip user ID validation
model_id: The model identifier
query_request: The query request object
started_at: Timestamp when the request started (ISO 8601 format)
client: The Llama Stack client for API interactions
moderation_result: The moderation result
inline_rag_context: Inline RAG context
vector_store_ids: Vector store IDs used in the query for source resolution.
rag_id_mapping: Mapping from vector_db_id to user-facing rag_id.
"""

# Conversation & User context
conversation_id: str
request_id: str
user_id: str
skip_userid_check: bool

# Model info
model_id: str

# Request & Timing
query_request: QueryRequest
started_at: str

# Dependencies & State
client: AsyncLlamaStackClient
moderation_result: ShieldModerationResult

# RAG index identification
inline_rag_context: RAGContext
vector_store_ids: list[str] = field(default_factory=list)
rag_id_mapping: dict[str, str] = field(default_factory=dict)
55 changes: 0 additions & 55 deletions src/models/context.py

This file was deleted.

11 changes: 0 additions & 11 deletions src/models/rlsapi/README.md

This file was deleted.

1 change: 0 additions & 1 deletion src/models/rlsapi/__init__.py

This file was deleted.

2 changes: 1 addition & 1 deletion src/utils/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import constants
from app.database import get_session
from authorization.azure_token_manager import AzureEntraIDManager
from cache.cache_entry import CacheEntry
from cache.cache_error import CacheError
from client import AsyncLlamaStackClientHolder
from configuration import configuration
Expand All @@ -35,7 +36,6 @@
ServiceUnavailableResponse,
UnprocessableEntityResponse,
)
from models.cache_entry import CacheEntry
from models.common.query import Attachment
from models.common.turn_summary import TurnSummary
from models.config import Action
Expand Down
Loading
Loading