diff --git a/docs/okp_guide.md b/docs/okp_guide.md index 31136c539..5d64f1537 100644 --- a/docs/okp_guide.md +++ b/docs/okp_guide.md @@ -148,8 +148,7 @@ okp: * **`rag.inline`** and **`rag.tool`**: Enable OKP as the RAG source for inline context injection and for the RAG tool. Tool rag means the LLM will be provided a search tool it can choose to invoke to find relevant content and augment the user prompt. The tool may or may not be invoked. Inline means a rag search and prompt augmentation will always occur. * **`okp.offline`**: When `true`, source URLs use `parent_id` (offline/Mimir-style). When `false`, use `reference_url` (online). -If you want to filter the docs to a specific product, you can include a query -filter such as: +If you want to filter the docs to a specific product, you can include a static query filter such as: ```yaml okp: @@ -160,6 +159,123 @@ okp: When you launch Lightspeed stack it will augment the Llamastack run.yaml with configuration for OKP. +### Dynamic Metadata Filtering + +In addition to static filters configured in `lightspeed-stack.yaml`, you can apply **dynamic filters** per query using structured filter objects in the request. Dynamic filters are combined with static filters using AND logic. + +#### Supported Filter Operations + +**Comparison Filters:** +- `eq` - Equal to (exact match) +- `ne` - Not equal to +- `in` - Value in list +- `nin` - Value not in list + +**Compound Filters:** +- `and` - All filters must match +- `or` - Any filter must match + +> **Note:** Range operators (`gt`, `gte`, `lt`, `lte`) are not supported because they use lexicographic comparison on string fields, which can produce unexpected results. + +#### Dynamic Filter Examples + +**Simple equality filter:** + +```bash +curl -sX POST http://localhost:8080/v1/query \ + -H "Content-Type: application/json" \ + -d '{ + "query": "How to install ansible?", + "solr": { + "mode": "hybrid", + "filters": { + "filters": { + "type": "eq", + "key": "product", + "value": "ansible_automation_platform" + } + } + } + }' +``` + +**Multiple values with 'in' filter:** + +```bash +curl -sX POST http://localhost:8080/v1/query \ + -H "Content-Type: application/json" \ + -d '{ + "query": "Security best practices", + "solr": { + "mode": "semantic", + "filters": { + "filters": { + "type": "in", + "key": "product", + "value": ["openshift_container_platform", "ansible_automation_platform", "rhel"] + } + } + } + }' +``` + +**Compound filters (AND/OR):** + +```bash +curl -sX POST http://localhost:8080/v1/query \ + -H "Content-Type: application/json" \ + -d '{ + "query": "Advanced configuration", + "solr": { + "mode": "hybrid", + "filters": { + "filters": { + "type": "and", + "filters": [ + {"type": "eq", "key": "product", "value": "openshift_container_platform"}, + {"type": "eq", "key": "version", "value": "4.21"} + ] + } + } + } + }' +``` + +**Nested compound filters:** + +```bash +curl -sX POST http://localhost:8080/v1/query \ + -H "Content-Type: application/json" \ + -d '{ + "query": "Troubleshooting guide", + "solr": { + "mode": "hybrid", + "filters": { + "filters": { + "type": "and", + "filters": [ + {"type": "eq", "key": "doc_type", "value": "guide"}, + { + "type": "or", + "filters": [ + {"type": "eq", "key": "product", "value": "openshift_container_platform"}, + {"type": "eq", "key": "product", "value": "ansible_automation_platform"} + ] + } + ] + } + } + } + }' +``` + +#### Filter Behavior + +- **Static filters preserved:** The configured `chunk_filter_query` (e.g., `"product:*openshift*"`) is always applied +- **Dynamic filters added:** Request filters are combined with static filters using AND logic +- **String escaping:** Special Solr characters in filter values are automatically escaped +- **Works with all search modes:** Filters apply to `semantic`, `hybrid`, and `lexical` search modes + ### Configure Lightspeed Stack for library mode For the simplest local development, configure `lightspeed-stack.yaml` to diff --git a/docs/openapi.json b/docs/openapi.json index f19d56a1a..8052a58d6 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -19379,12 +19379,39 @@ } ], "title": "Filters", - "description": "Solr provider filter payload passed through as params['solr'].", + "description": "Solr provider filter payload passed through as params['solr']. Supports structured metadata filters (eq, ne, in, nin comparison operators). Legacy filter-only objects (e.g. fq) are still accepted.", "examples": [ + { + "filters": { + "key": "product", + "type": "eq", + "value": "openshift_container_platform" + } + }, + { + "filters": { + "filters": [ + { + "key": "product", + "type": "eq", + "value": "openshift_container_platform" + }, + { + "key": "version", + "type": "in", + "value": [ + "4.14", + "4.15", + "4.16" + ] + } + ], + "type": "and" + } + }, { "fq": [ - "product:*openshift*", - "product_version:*4.16*" + "product:*openshift*" ] } ] diff --git a/src/models/common/query.py b/src/models/common/query.py index ceb450f39..508de4de1 100644 --- a/src/models/common/query.py +++ b/src/models/common/query.py @@ -82,8 +82,38 @@ class SolrVectorSearchRequest(BaseModel): ) filters: Optional[dict[str, Any]] = Field( None, - description="Solr provider filter payload passed through as params['solr'].", - examples=[{"fq": ["product:*openshift*", "product_version:*4.16*"]}], + description=( + "Solr provider filter payload passed through as params['solr']. " + "Supports structured metadata filters (eq, ne, in, nin comparison operators). " + "Legacy filter-only objects (e.g. fq) are still accepted." + ), + examples=[ + { + "filters": { + "type": "eq", + "key": "product", + "value": "openshift_container_platform", + } + }, + { + "filters": { + "type": "and", + "filters": [ + { + "type": "eq", + "key": "product", + "value": "openshift_container_platform", + }, + { + "type": "in", + "key": "version", + "value": ["4.14", "4.15", "4.16"], + }, + ], + } + }, + {"fq": ["product:*openshift*"]}, + ], ) @model_validator(mode="before") diff --git a/src/utils/vector_search.py b/src/utils/vector_search.py index 9d901271e..97f982767 100644 --- a/src/utils/vector_search.py +++ b/src/utils/vector_search.py @@ -61,9 +61,11 @@ def _build_query_params( Args: solr: Optional structured Solr request (mode and filters from the API). + - mode: Solr search mode (semantic, hybrid, lexical) + - filters: Solr filter payload, may contain structured metadata filters Returns: - Parameter dictionary for ``vector_io.query``. + Parameter dictionary for ``vector_io.query`` with extracted filters at top level. """ resolved_mode = ( solr.mode @@ -79,8 +81,23 @@ def _build_query_params( logger.debug("query_request.solr: %s", solr) if solr is not None and solr.filters is not None: - params["solr"] = solr.filters - logger.debug("Final params with solr filters: %s", params) + # Extract structured metadata filters if present in solr.filters dict + # Filters need to be at top-level params for vector_io.query + if isinstance(solr.filters, dict) and "filters" in solr.filters: + params["filters"] = solr.filters["filters"] + logger.debug("Extracted filters from solr.filters: %s", params["filters"]) + + # Pass remaining solr.filters content (legacy fq, etc.) to params["solr"] + remaining_filters = { + k: v for k, v in solr.filters.items() if k != "filters" + } + if remaining_filters: + params["solr"] = remaining_filters + logger.debug("Remaining solr.filters: %s", remaining_filters) + else: + # Legacy format: entire solr.filters dict is passed as params["solr"] + params["solr"] = solr.filters + logger.debug("Legacy solr.filters format: %s", params["solr"]) else: logger.debug("No solr filters provided") diff --git a/tests/unit/utils/test_vector_search.py b/tests/unit/utils/test_vector_search.py index 0945bb236..3e220e79b 100644 --- a/tests/unit/utils/test_vector_search.py +++ b/tests/unit/utils/test_vector_search.py @@ -67,14 +67,89 @@ def test_default_params(self) -> None: assert params["mode"] == constants.SOLR_VECTOR_SEARCH_DEFAULT_MODE assert "solr" not in params - def test_with_solr_filters(self) -> None: - """Test parameters when solr filters are provided.""" - solr = SolrVectorSearchRequest.model_validate({"filter": "value"}) + def test_with_legacy_solr_filters(self) -> None: + """Test parameters when legacy solr filters are provided.""" + solr = SolrVectorSearchRequest.model_validate( + { + "filters": { + "fq": ["platform:openshift"], + }, + }, + ) + params = _build_query_params(solr=solr) + + assert params["solr"] == {"fq": ["platform:openshift"]} + assert params["k"] == constants.SOLR_VECTOR_SEARCH_DEFAULT_K + assert "filters" not in params + + def test_with_structured_metadata_filters(self) -> None: + """Test parameters with structured metadata filter format.""" + solr = SolrVectorSearchRequest.model_validate( + { + "filters": { + "filters": { + "type": "eq", + "key": "platform", + "value": "openshift", + }, + }, + }, + ) + params = _build_query_params(solr=solr) + + # Filters should be extracted to top-level + assert "filters" in params + assert params["filters"]["type"] == "eq" + assert params["filters"]["key"] == "platform" + assert params["filters"]["value"] == "openshift" + assert params["k"] == constants.SOLR_VECTOR_SEARCH_DEFAULT_K + # No remaining solr params + assert "solr" not in params + + def test_with_filters_and_other_solr_params(self) -> None: + """Test parameters with both filters and other solr-specific params.""" + solr = SolrVectorSearchRequest.model_validate( + { + "filters": { + "filters": { + "type": "in", + "key": "version", + "value": ["4.14", "4.15"], + }, + "custom_param": "value", + }, + }, + ) params = _build_query_params(solr=solr) - assert params["solr"] == {"filter": "value"} + # Filters extracted to top-level + assert params["filters"]["type"] == "in" + assert params["filters"]["key"] == "version" + # Other params remain under solr key + assert params["solr"] == {"custom_param": "value"} assert params["k"] == constants.SOLR_VECTOR_SEARCH_DEFAULT_K + def test_with_compound_filter(self) -> None: + """Test parameters with compound AND filter.""" + solr = SolrVectorSearchRequest.model_validate( + { + "filters": { + "filters": { + "type": "and", + "filters": [ + {"type": "eq", "key": "platform", "value": "openshift"}, + {"type": "ne", "key": "status", "value": "archived"}, + ], + }, + }, + }, + ) + params = _build_query_params(solr=solr) + + assert params["filters"]["type"] == "and" + assert len(params["filters"]["filters"]) == 2 + assert "solr" not in params + def test_custom_mode(self) -> None: """Request mode overrides the default Solr vector_io mode.""" solr = SolrVectorSearchRequest(mode="lexical") @@ -86,7 +161,8 @@ def test_custom_mode(self) -> None: def test_mode_with_solr_filters(self) -> None: """Custom mode is combined with solr filter payload.""" solr = SolrVectorSearchRequest( - mode="semantic", filters={"fq": ["product:*openshift*"]} + mode="semantic", + filters={"fq": ["product:*openshift*"]}, ) params = _build_query_params(solr=solr) @@ -95,7 +171,9 @@ def test_mode_with_solr_filters(self) -> None: def test_mode_with_only_filters(self) -> None: """Mode is set to default value when only filters are provided.""" - solr = SolrVectorSearchRequest(filters={"fq": ["product:*openshift*"]}) + solr = SolrVectorSearchRequest( + filters={"fq": ["product:*openshift*"]}, + ) params = _build_query_params(solr=solr) assert params["mode"] == constants.SOLR_VECTOR_SEARCH_DEFAULT_MODE