From ecd0f41c284e572c4fd4cc44d959ac5fd0151d6a Mon Sep 17 00:00:00 2001 From: Urmzd Mukhammadnaim Date: Tue, 17 Mar 2026 00:03:02 -0500 Subject: [PATCH 1/5] fix: allow numeric resource IDs in _VALID_RESOURCE_NAME_REGEX The regex required the first character to be a lowercase letter [a-z], which rejected bare numeric IDs (e.g. "1234567890") that the API assigns to resources like RAG corpora and files. Updated to accept any alphanumeric first character [a-zA-Z0-9]. Fixes all three definitions of _VALID_RESOURCE_NAME_REGEX: - vertexai/preview/rag/utils/_gapic_utils.py - vertexai/rag/utils/_gapic_utils.py - google/cloud/aiplatform/vertex_ray/util/_validation_utils.py --- google/cloud/aiplatform/vertex_ray/util/_validation_utils.py | 2 +- vertexai/preview/rag/utils/_gapic_utils.py | 2 +- vertexai/rag/utils/_gapic_utils.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/aiplatform/vertex_ray/util/_validation_utils.py b/google/cloud/aiplatform/vertex_ray/util/_validation_utils.py index 5c2a833aa7..12cdeda3f8 100644 --- a/google/cloud/aiplatform/vertex_ray/util/_validation_utils.py +++ b/google/cloud/aiplatform/vertex_ray/util/_validation_utils.py @@ -50,7 +50,7 @@ _DEFAULT_REGION = "us" _PERSISTENT_RESOURCE_NAME_PATTERN = "projects/{}/locations/{}/persistentResources/{}" -_VALID_RESOURCE_NAME_REGEX = "[a-z][a-zA-Z0-9._-]{0,127}" +_VALID_RESOURCE_NAME_REGEX = "[a-zA-Z0-9][a-zA-Z0-9._-]{0,127}" _DASHBOARD_URI_SUFFIX = "aiplatform-training.googleusercontent.com" diff --git a/vertexai/preview/rag/utils/_gapic_utils.py b/vertexai/preview/rag/utils/_gapic_utils.py index 4150111c60..66fe004492 100644 --- a/vertexai/preview/rag/utils/_gapic_utils.py +++ b/vertexai/preview/rag/utils/_gapic_utils.py @@ -78,7 +78,7 @@ ) -_VALID_RESOURCE_NAME_REGEX = "[a-z][a-zA-Z0-9._-]{0,127}" +_VALID_RESOURCE_NAME_REGEX = "[a-zA-Z0-9][a-zA-Z0-9._-]{0,127}" _VALID_DOCUMENT_AI_PROCESSOR_NAME_REGEX = ( r"projects/[^/]+/locations/[^/]+/processors/[^/]+(?:/processorVersions/[^/]+)?" ) diff --git a/vertexai/rag/utils/_gapic_utils.py b/vertexai/rag/utils/_gapic_utils.py index 3ee39a7a0f..0104df41f8 100644 --- a/vertexai/rag/utils/_gapic_utils.py +++ b/vertexai/rag/utils/_gapic_utils.py @@ -67,7 +67,7 @@ ) -_VALID_RESOURCE_NAME_REGEX = "[a-z][a-zA-Z0-9._-]{0,127}" +_VALID_RESOURCE_NAME_REGEX = "[a-zA-Z0-9][a-zA-Z0-9._-]{0,127}" _VALID_DOCUMENT_AI_PROCESSOR_NAME_REGEX = ( r"projects/[^/]+/locations/[^/]+/processors/[^/]+(?:/processorVersions/[^/]+)?" ) From 9108671597d7d38182cf8b93704a125bb551dd44 Mon Sep 17 00:00:00 2001 From: Urmzd Mukhammadnaim Date: Tue, 17 Mar 2026 00:11:08 -0500 Subject: [PATCH 2/5] test: add test cases for bare numeric resource IDs Adds tests for get_corpus and get_file with numeric IDs to verify the regex fix accepts API-assigned numeric resource identifiers. --- tests/unit/vertex_rag/test_rag_constants.py | 2 ++ .../unit/vertex_rag/test_rag_constants_preview.py | 2 ++ tests/unit/vertex_rag/test_rag_data.py | 13 +++++++++++++ tests/unit/vertex_rag/test_rag_data_preview.py | 15 +++++++++++++++ 4 files changed, 32 insertions(+) diff --git a/tests/unit/vertex_rag/test_rag_constants.py b/tests/unit/vertex_rag/test_rag_constants.py index 65459cec2a..19fa4d3970 100644 --- a/tests/unit/vertex_rag/test_rag_constants.py +++ b/tests/unit/vertex_rag/test_rag_constants.py @@ -82,6 +82,7 @@ TEST_CORPUS_DISPLAY_NAME = "my-corpus-1" TEST_CORPUS_DISCRIPTION = "My first corpus." TEST_RAG_CORPUS_ID = "generate-123" +TEST_RAG_CORPUS_NUMERIC_ID = "1234567890" TEST_API_ENDPOINT = "us-central1-" + aiplatform.constants.base.API_BASE_PATH TEST_RAG_CORPUS_RESOURCE_NAME = f"projects/{TEST_PROJECT_NUMBER}/locations/{TEST_REGION}/ragCorpora/{TEST_RAG_CORPUS_ID}" @@ -244,6 +245,7 @@ TEST_API_ENDPOINT, TEST_PROJECT_NUMBER, TEST_REGION, TEST_RAG_CORPUS_ID ) TEST_RAG_FILE_ID = "generate-456" +TEST_RAG_FILE_NUMERIC_ID = "9876543210" TEST_RAG_FILE_RESOURCE_NAME = ( TEST_RAG_CORPUS_RESOURCE_NAME + f"/ragFiles/{TEST_RAG_FILE_ID}" ) diff --git a/tests/unit/vertex_rag/test_rag_constants_preview.py b/tests/unit/vertex_rag/test_rag_constants_preview.py index 0c0f3c810c..e41950b1a5 100644 --- a/tests/unit/vertex_rag/test_rag_constants_preview.py +++ b/tests/unit/vertex_rag/test_rag_constants_preview.py @@ -91,6 +91,7 @@ TEST_CORPUS_DISPLAY_NAME = "my-corpus-1" TEST_CORPUS_DISCRIPTION = "My first corpus." TEST_RAG_CORPUS_ID = "generate-123" +TEST_RAG_CORPUS_NUMERIC_ID = "1234567890" TEST_API_ENDPOINT = "us-central1-" + aiplatform.constants.base.API_BASE_PATH TEST_RAG_CORPUS_RESOURCE_NAME = f"projects/{TEST_PROJECT_NUMBER}/locations/{TEST_REGION}/ragCorpora/{TEST_RAG_CORPUS_ID}" @@ -489,6 +490,7 @@ TEST_API_ENDPOINT, TEST_PROJECT_NUMBER, TEST_REGION, TEST_RAG_CORPUS_ID ) TEST_RAG_FILE_ID = "generate-456" +TEST_RAG_FILE_NUMERIC_ID = "9876543210" TEST_RAG_FILE_RESOURCE_NAME = ( TEST_RAG_CORPUS_RESOURCE_NAME + f"/ragFiles/{TEST_RAG_FILE_ID}" ) diff --git a/tests/unit/vertex_rag/test_rag_data.py b/tests/unit/vertex_rag/test_rag_data.py index 94f6c35bf9..1b6cd7b7c3 100644 --- a/tests/unit/vertex_rag/test_rag_data.py +++ b/tests/unit/vertex_rag/test_rag_data.py @@ -663,6 +663,11 @@ def test_get_corpus_id_success(self): rag_corpus = rag.get_corpus(test_rag_constants.TEST_RAG_CORPUS_ID) rag_corpus_eq(rag_corpus, test_rag_constants.TEST_RAG_CORPUS) + @pytest.mark.usefixtures("rag_data_client_mock") + def test_get_corpus_numeric_id_success(self): + rag_corpus = rag.get_corpus(test_rag_constants.TEST_RAG_CORPUS_NUMERIC_ID) + rag_corpus_eq(rag_corpus, test_rag_constants.TEST_RAG_CORPUS) + @pytest.mark.usefixtures("rag_data_client_mock_exception") def test_get_corpus_failure(self): with pytest.raises(RuntimeError) as e: @@ -883,6 +888,14 @@ def test_get_file_id_success(self): ) rag_file_eq(rag_file, test_rag_constants.TEST_RAG_FILE) + @pytest.mark.usefixtures("rag_data_client_mock") + def test_get_file_numeric_id_success(self): + rag_file = rag.get_file( + name=test_rag_constants.TEST_RAG_FILE_NUMERIC_ID, + corpus_name=test_rag_constants.TEST_RAG_CORPUS_NUMERIC_ID, + ) + rag_file_eq(rag_file, test_rag_constants.TEST_RAG_FILE) + @pytest.mark.usefixtures("rag_data_client_mock_exception") def test_get_file_failure(self): with pytest.raises(RuntimeError) as e: diff --git a/tests/unit/vertex_rag/test_rag_data_preview.py b/tests/unit/vertex_rag/test_rag_data_preview.py index b1e7d4c3b0..9ac9b66aab 100644 --- a/tests/unit/vertex_rag/test_rag_data_preview.py +++ b/tests/unit/vertex_rag/test_rag_data_preview.py @@ -1315,6 +1315,13 @@ def test_get_corpus_id_success(self): rag_corpus = rag.get_corpus(test_rag_constants_preview.TEST_RAG_CORPUS_ID) rag_corpus_eq(rag_corpus, test_rag_constants_preview.TEST_RAG_CORPUS) + @pytest.mark.usefixtures("rag_data_client_preview_mock") + def test_get_corpus_numeric_id_success(self): + rag_corpus = rag.get_corpus( + test_rag_constants_preview.TEST_RAG_CORPUS_NUMERIC_ID + ) + rag_corpus_eq(rag_corpus, test_rag_constants_preview.TEST_RAG_CORPUS) + @pytest.mark.usefixtures("rag_data_client_preview_mock_exception") def test_get_corpus_failure(self): with pytest.raises(RuntimeError) as e: @@ -1454,6 +1461,14 @@ def test_get_file_id_success(self): ) rag_file_eq(rag_file, test_rag_constants_preview.TEST_RAG_FILE) + @pytest.mark.usefixtures("rag_data_client_preview_mock") + def test_get_file_numeric_id_success(self): + rag_file = rag.get_file( + name=test_rag_constants_preview.TEST_RAG_FILE_NUMERIC_ID, + corpus_name=test_rag_constants_preview.TEST_RAG_CORPUS_NUMERIC_ID, + ) + rag_file_eq(rag_file, test_rag_constants_preview.TEST_RAG_FILE) + @pytest.mark.usefixtures("rag_data_client_preview_mock_exception") def test_get_file_failure(self): with pytest.raises(RuntimeError) as e: From 37d4ed6f8a268b33137da122a867450915dde36c Mon Sep 17 00:00:00 2001 From: Urmzd Mukhammadnaim Date: Tue, 17 Mar 2026 00:45:50 -0500 Subject: [PATCH 3/5] revert: remove unrelated vertex_ray regex change The vertex_ray _VALID_RESOURCE_NAME_REGEX intentionally requires a lowercase letter first for persistent resource names, which is a different context from RAG resource IDs. --- google/cloud/aiplatform/vertex_ray/util/_validation_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/aiplatform/vertex_ray/util/_validation_utils.py b/google/cloud/aiplatform/vertex_ray/util/_validation_utils.py index 12cdeda3f8..5c2a833aa7 100644 --- a/google/cloud/aiplatform/vertex_ray/util/_validation_utils.py +++ b/google/cloud/aiplatform/vertex_ray/util/_validation_utils.py @@ -50,7 +50,7 @@ _DEFAULT_REGION = "us" _PERSISTENT_RESOURCE_NAME_PATTERN = "projects/{}/locations/{}/persistentResources/{}" -_VALID_RESOURCE_NAME_REGEX = "[a-zA-Z0-9][a-zA-Z0-9._-]{0,127}" +_VALID_RESOURCE_NAME_REGEX = "[a-z][a-zA-Z0-9._-]{0,127}" _DASHBOARD_URI_SUFFIX = "aiplatform-training.googleusercontent.com" From 40f4dc2d02b593b935dbcb984d9de8d0f490c8e7 Mon Sep 17 00:00:00 2001 From: Urmzd Mukhammadnaim Date: Tue, 17 Mar 2026 00:56:44 -0500 Subject: [PATCH 4/5] docs: add inline comment explaining numeric ID support in regex --- vertexai/preview/rag/utils/_gapic_utils.py | 1 + vertexai/rag/utils/_gapic_utils.py | 1 + 2 files changed, 2 insertions(+) diff --git a/vertexai/preview/rag/utils/_gapic_utils.py b/vertexai/preview/rag/utils/_gapic_utils.py index 66fe004492..5bcb1e9a8e 100644 --- a/vertexai/preview/rag/utils/_gapic_utils.py +++ b/vertexai/preview/rag/utils/_gapic_utils.py @@ -78,6 +78,7 @@ ) +# Allows numeric resource IDs (e.g. "1234567890") as bare names. _VALID_RESOURCE_NAME_REGEX = "[a-zA-Z0-9][a-zA-Z0-9._-]{0,127}" _VALID_DOCUMENT_AI_PROCESSOR_NAME_REGEX = ( r"projects/[^/]+/locations/[^/]+/processors/[^/]+(?:/processorVersions/[^/]+)?" diff --git a/vertexai/rag/utils/_gapic_utils.py b/vertexai/rag/utils/_gapic_utils.py index 0104df41f8..6535767b71 100644 --- a/vertexai/rag/utils/_gapic_utils.py +++ b/vertexai/rag/utils/_gapic_utils.py @@ -67,6 +67,7 @@ ) +# Allows numeric resource IDs (e.g. "1234567890") as bare names. _VALID_RESOURCE_NAME_REGEX = "[a-zA-Z0-9][a-zA-Z0-9._-]{0,127}" _VALID_DOCUMENT_AI_PROCESSOR_NAME_REGEX = ( r"projects/[^/]+/locations/[^/]+/processors/[^/]+(?:/processorVersions/[^/]+)?" From a71df75ac7db7b5232af0d337956df894c34d507 Mon Sep 17 00:00:00 2001 From: Urmzd Mukhammadnaim Date: Tue, 17 Mar 2026 01:35:48 -0500 Subject: [PATCH 5/5] test: use isolated mocks in numeric ID tests to exercise regex code path --- tests/unit/vertex_rag/test_rag_data.py | 54 ++++++++++++++--- .../unit/vertex_rag/test_rag_data_preview.py | 58 +++++++++++++++---- 2 files changed, 92 insertions(+), 20 deletions(-) diff --git a/tests/unit/vertex_rag/test_rag_data.py b/tests/unit/vertex_rag/test_rag_data.py index 1b6cd7b7c3..adc67c70c2 100644 --- a/tests/unit/vertex_rag/test_rag_data.py +++ b/tests/unit/vertex_rag/test_rag_data.py @@ -663,10 +663,25 @@ def test_get_corpus_id_success(self): rag_corpus = rag.get_corpus(test_rag_constants.TEST_RAG_CORPUS_ID) rag_corpus_eq(rag_corpus, test_rag_constants.TEST_RAG_CORPUS) - @pytest.mark.usefixtures("rag_data_client_mock") def test_get_corpus_numeric_id_success(self): - rag_corpus = rag.get_corpus(test_rag_constants.TEST_RAG_CORPUS_NUMERIC_ID) - rag_corpus_eq(rag_corpus, test_rag_constants.TEST_RAG_CORPUS) + """Bare numeric IDs must pass the regex and be expanded to full resource names.""" + with mock.patch.object( + rag.utils._gapic_utils, "create_rag_data_service_client" + ) as mock_client_factory: + api_client_mock = mock.Mock(spec=VertexRagDataServiceClient) + api_client_mock.parse_rag_corpus_path.side_effect = ( + VertexRagDataServiceClient.parse_rag_corpus_path + ) + api_client_mock.rag_corpus_path.side_effect = ( + VertexRagDataServiceClient.rag_corpus_path + ) + api_client_mock.get_rag_corpus.return_value = ( + test_rag_constants.TEST_GAPIC_RAG_CORPUS + ) + mock_client_factory.return_value = api_client_mock + + rag_corpus = rag.get_corpus(test_rag_constants.TEST_RAG_CORPUS_NUMERIC_ID) + rag_corpus_eq(rag_corpus, test_rag_constants.TEST_RAG_CORPUS) @pytest.mark.usefixtures("rag_data_client_mock_exception") def test_get_corpus_failure(self): @@ -888,13 +903,34 @@ def test_get_file_id_success(self): ) rag_file_eq(rag_file, test_rag_constants.TEST_RAG_FILE) - @pytest.mark.usefixtures("rag_data_client_mock") def test_get_file_numeric_id_success(self): - rag_file = rag.get_file( - name=test_rag_constants.TEST_RAG_FILE_NUMERIC_ID, - corpus_name=test_rag_constants.TEST_RAG_CORPUS_NUMERIC_ID, - ) - rag_file_eq(rag_file, test_rag_constants.TEST_RAG_FILE) + """Bare numeric IDs must pass the regex and be expanded to full resource names.""" + with mock.patch.object( + rag.utils._gapic_utils, "create_rag_data_service_client" + ) as mock_client_factory: + api_client_mock = mock.Mock(spec=VertexRagDataServiceClient) + api_client_mock.parse_rag_corpus_path.side_effect = ( + VertexRagDataServiceClient.parse_rag_corpus_path + ) + api_client_mock.parse_rag_file_path.side_effect = ( + VertexRagDataServiceClient.parse_rag_file_path + ) + api_client_mock.rag_corpus_path.side_effect = ( + VertexRagDataServiceClient.rag_corpus_path + ) + api_client_mock.rag_file_path.side_effect = ( + VertexRagDataServiceClient.rag_file_path + ) + api_client_mock.get_rag_file.return_value = ( + test_rag_constants.TEST_GAPIC_RAG_FILE + ) + mock_client_factory.return_value = api_client_mock + + rag_file = rag.get_file( + name=test_rag_constants.TEST_RAG_FILE_NUMERIC_ID, + corpus_name=test_rag_constants.TEST_RAG_CORPUS_NUMERIC_ID, + ) + rag_file_eq(rag_file, test_rag_constants.TEST_RAG_FILE) @pytest.mark.usefixtures("rag_data_client_mock_exception") def test_get_file_failure(self): diff --git a/tests/unit/vertex_rag/test_rag_data_preview.py b/tests/unit/vertex_rag/test_rag_data_preview.py index 9ac9b66aab..2acbdb87ff 100644 --- a/tests/unit/vertex_rag/test_rag_data_preview.py +++ b/tests/unit/vertex_rag/test_rag_data_preview.py @@ -1315,12 +1315,27 @@ def test_get_corpus_id_success(self): rag_corpus = rag.get_corpus(test_rag_constants_preview.TEST_RAG_CORPUS_ID) rag_corpus_eq(rag_corpus, test_rag_constants_preview.TEST_RAG_CORPUS) - @pytest.mark.usefixtures("rag_data_client_preview_mock") def test_get_corpus_numeric_id_success(self): - rag_corpus = rag.get_corpus( - test_rag_constants_preview.TEST_RAG_CORPUS_NUMERIC_ID - ) - rag_corpus_eq(rag_corpus, test_rag_constants_preview.TEST_RAG_CORPUS) + """Bare numeric IDs must pass the regex and be expanded to full resource names.""" + with mock.patch.object( + rag.utils._gapic_utils, "create_rag_data_service_client" + ) as mock_client_factory: + api_client_mock = mock.Mock(spec=VertexRagDataServiceClient) + api_client_mock.parse_rag_corpus_path.side_effect = ( + VertexRagDataServiceClient.parse_rag_corpus_path + ) + api_client_mock.rag_corpus_path.side_effect = ( + VertexRagDataServiceClient.rag_corpus_path + ) + api_client_mock.get_rag_corpus.return_value = ( + test_rag_constants_preview.TEST_GAPIC_RAG_CORPUS + ) + mock_client_factory.return_value = api_client_mock + + rag_corpus = rag.get_corpus( + test_rag_constants_preview.TEST_RAG_CORPUS_NUMERIC_ID + ) + rag_corpus_eq(rag_corpus, test_rag_constants_preview.TEST_RAG_CORPUS) @pytest.mark.usefixtures("rag_data_client_preview_mock_exception") def test_get_corpus_failure(self): @@ -1461,13 +1476,34 @@ def test_get_file_id_success(self): ) rag_file_eq(rag_file, test_rag_constants_preview.TEST_RAG_FILE) - @pytest.mark.usefixtures("rag_data_client_preview_mock") def test_get_file_numeric_id_success(self): - rag_file = rag.get_file( - name=test_rag_constants_preview.TEST_RAG_FILE_NUMERIC_ID, - corpus_name=test_rag_constants_preview.TEST_RAG_CORPUS_NUMERIC_ID, - ) - rag_file_eq(rag_file, test_rag_constants_preview.TEST_RAG_FILE) + """Bare numeric IDs must pass the regex and be expanded to full resource names.""" + with mock.patch.object( + rag.utils._gapic_utils, "create_rag_data_service_client" + ) as mock_client_factory: + api_client_mock = mock.Mock(spec=VertexRagDataServiceClient) + api_client_mock.parse_rag_corpus_path.side_effect = ( + VertexRagDataServiceClient.parse_rag_corpus_path + ) + api_client_mock.parse_rag_file_path.side_effect = ( + VertexRagDataServiceClient.parse_rag_file_path + ) + api_client_mock.rag_corpus_path.side_effect = ( + VertexRagDataServiceClient.rag_corpus_path + ) + api_client_mock.rag_file_path.side_effect = ( + VertexRagDataServiceClient.rag_file_path + ) + api_client_mock.get_rag_file.return_value = ( + test_rag_constants_preview.TEST_GAPIC_RAG_FILE + ) + mock_client_factory.return_value = api_client_mock + + rag_file = rag.get_file( + name=test_rag_constants_preview.TEST_RAG_FILE_NUMERIC_ID, + corpus_name=test_rag_constants_preview.TEST_RAG_CORPUS_NUMERIC_ID, + ) + rag_file_eq(rag_file, test_rag_constants_preview.TEST_RAG_FILE) @pytest.mark.usefixtures("rag_data_client_preview_mock_exception") def test_get_file_failure(self):