diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py index fd8628bf12..33521f0c8f 100644 --- a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py +++ b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py @@ -65,7 +65,7 @@ ), ) INFERENCE_CONFIG = types.EvaluationRunInferenceConfig( - model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash" + model="projects/977012026409/locations/us-central1/publishers/google/models/gemini-2.5-flash" ) TOOL = genai_types.Tool( function_declarations=[ @@ -82,8 +82,14 @@ AGENT_INFO = types.evals.AgentInfo( agent_resource_name="projects/123/locations/us-central1/reasoningEngines/456", name="agent-1", - instruction="agent-1 instruction", - tool_declarations=[TOOL], + agents={ + "agent-1": types.evals.AgentConfig( + agent_id="agent-1", + instruction="agent-1 instruction", + tools=[TOOL], + ) + }, + root_agent_id="agent-1", ) DEFAULT_PROMPT_TEMPLATE = "{prompt}" INPUT_DF_WITH_CONTEXT_AND_HISTORY = pd.DataFrame( @@ -96,9 +102,9 @@ } ) CANDIDATE_NAME = "candidate_1" -MODEL_NAME = "projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash" +MODEL_NAME = "projects/977012026409/locations/us-central1/publishers/google/models/gemini-2.5-flash" EVAL_SET_NAME = ( - "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800" + "projects/977012026409/locations/us-central1/evaluationSets/6619939608513740800" ) @@ -140,12 +146,7 @@ def test_create_eval_run_data_source_evaluation_set(client): assert evaluation_run.inference_configs[ AGENT_INFO.name ] == types.EvaluationRunInferenceConfig( - agent_config=types.EvaluationRunAgentConfig( - developer_instruction=genai_types.Content( - parts=[genai_types.Part(text="agent-1 instruction")] - ), - tools=[TOOL], - ) + agent_configs=AGENT_INFO.agents, ) assert evaluation_run.labels == { "vertex-ai-evaluation-agent-engine-id": "456", diff --git a/tests/unit/vertexai/genai/replays/test_generate_user_scenarios.py b/tests/unit/vertexai/genai/replays/test_generate_user_scenarios.py index afa54271f8..955ea690cc 100644 --- a/tests/unit/vertexai/genai/replays/test_generate_user_scenarios.py +++ b/tests/unit/vertexai/genai/replays/test_generate_user_scenarios.py @@ -22,24 +22,27 @@ def test_gen_user_scenarios(client): """Tests that generate_user_scenarios() correctly calls the API and parses the response.""" eval_dataset = client.evals.generate_user_scenarios( - agents={ - "booking-agent": types.evals.AgentConfig( - agent_id="booking-agent", - agent_type="service_agent", - description="An agent capable of booking flights and hotels.", - instruction="You are a helpful travel assistant. Use tools to find flights.", - tools=[ - { - "function_declarations": [ - { - "name": "search_flights", - "description": "Search for available flights.", - } - ] - } - ], - ) - }, + agent_info=types.evals.AgentInfo( + agents={ + "booking-agent": types.evals.AgentConfig( + agent_id="booking-agent", + agent_type="service_agent", + description="An agent capable of booking flights and hotels.", + instruction="You are a helpful travel assistant. Use tools to find flights.", + tools=[ + { + "function_declarations": [ + { + "name": "search_flights", + "description": "Search for available flights.", + } + ] + } + ], + ) + }, + root_agent_id="booking-agent", + ), user_scenario_generation_config=types.evals.UserScenarioGenerationConfig( user_scenario_count=2, simulation_instruction=( @@ -49,18 +52,11 @@ def test_gen_user_scenarios(client): environment_data="Today is Monday. Flights to Paris are available.", model_name="gemini-2.5-flash", ), - root_agent_id="booking-agent", ) assert isinstance(eval_dataset, types.EvaluationDataset) assert len(eval_dataset.eval_cases) == 2 - assert ( - eval_dataset.eval_cases[0].user_scenario.starting_prompt - == "I want to find a flight from New York to London." - ) - assert ( - eval_dataset.eval_cases[0].user_scenario.conversation_plan - == "Actually, I meant Paris, not London. Please search for flights to Paris." - ) + assert eval_dataset.eval_cases[0].user_scenario.starting_prompt + assert eval_dataset.eval_cases[0].user_scenario.conversation_plan pytest_plugins = ("pytest_asyncio",) @@ -70,24 +66,27 @@ def test_gen_user_scenarios(client): async def test_gen_user_scenarios_async(client): """Tests that generate_user_scenarios() async correctly calls the API and parses the response.""" eval_dataset = await client.aio.evals.generate_user_scenarios( - agents={ - "booking-agent": types.evals.AgentConfig( - agent_id="booking-agent", - agent_type="service_agent", - description="An agent capable of booking flights and hotels.", - instruction="You are a helpful travel assistant. Use tools to find flights.", - tools=[ - { - "function_declarations": [ - { - "name": "search_flights", - "description": "Search for available flights.", - } - ] - } - ], - ) - }, + agent_info=types.evals.AgentInfo( + agents={ + "booking-agent": types.evals.AgentConfig( + agent_id="booking-agent", + agent_type="service_agent", + description="An agent capable of booking flights and hotels.", + instruction="You are a helpful travel assistant. Use tools to find flights.", + tools=[ + { + "function_declarations": [ + { + "name": "search_flights", + "description": "Search for available flights.", + } + ] + } + ], + ) + }, + root_agent_id="booking-agent", + ), user_scenario_generation_config=types.evals.UserScenarioGenerationConfig( user_scenario_count=2, simulation_instruction=( @@ -97,18 +96,11 @@ async def test_gen_user_scenarios_async(client): environment_data="Today is Monday. Flights to Paris are available.", model_name="gemini-2.5-flash", ), - root_agent_id="booking-agent", ) assert isinstance(eval_dataset, types.EvaluationDataset) assert len(eval_dataset.eval_cases) == 2 - assert ( - eval_dataset.eval_cases[1].user_scenario.starting_prompt - == "Find me a flight from Boston to Rome for next month." - ) - assert ( - eval_dataset.eval_cases[1].user_scenario.conversation_plan - == "Wait, change of plans. I need to go to Milan instead, and it needs to be a round trip, returning two weeks after departure." - ) + assert eval_dataset.eval_cases[1].user_scenario.starting_prompt + assert eval_dataset.eval_cases[1].user_scenario.conversation_plan pytestmark = pytest_helper.setup( diff --git a/tests/unit/vertexai/genai/replays/test_get_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_get_evaluation_run.py index 6d07a52178..56b52ba650 100644 --- a/tests/unit/vertexai/genai/replays/test_get_evaluation_run.py +++ b/tests/unit/vertexai/genai/replays/test_get_evaluation_run.py @@ -16,7 +16,6 @@ from tests.unit.vertexai.genai.replays import pytest_helper from vertexai import types -from google.genai import types as genai_types import datetime import pytest @@ -25,13 +24,13 @@ def test_get_eval_run(client): """Tests that get_evaluation_run() returns a correctly structured EvaluationRun.""" client._api_client._http_options.api_version = "v1beta1" evaluation_run_name = ( - "projects/503583131166/locations/us-central1/evaluationRuns/5133048044039700480" + "projects/977012026409/locations/us-central1/evaluationRuns/3940878372367761408" ) evaluation_run = client.evals.get_evaluation_run( name=evaluation_run_name, include_evaluation_items=True ) - check_run_5133048044039700480(client, evaluation_run, evaluation_run_name) - check_run_5133048044039700480_evaluation_item_results( + check_run_3940878372367761408(client, evaluation_run, evaluation_run_name) + check_run_3940878372367761408_evaluation_item_results( client, evaluation_run, evaluation_run_name ) @@ -40,10 +39,10 @@ def test_get_eval_run_include_evaluation_items_false(client): """Tests that get_evaluation_run() returns a correctly structured EvaluationRun.""" client._api_client._http_options.api_version = "v1beta1" evaluation_run_name = ( - "projects/503583131166/locations/us-central1/evaluationRuns/5133048044039700480" + "projects/977012026409/locations/us-central1/evaluationRuns/3940878372367761408" ) evaluation_run = client.evals.get_evaluation_run(name=evaluation_run_name) - check_run_5133048044039700480(client, evaluation_run, evaluation_run_name) + check_run_3940878372367761408(client, evaluation_run, evaluation_run_name) assert evaluation_run.evaluation_item_results is None @@ -103,172 +102,58 @@ def test_get_eval_run_eval_set_source(client): async def test_get_eval_run_async(client): """Tests that get_evaluation_run() returns a correctly structured EvaluationRun.""" client._api_client._http_options.api_version = "v1beta1" - eval_run_id = "5133048044039700480" + eval_run_id = "3940878372367761408" evaluation_run_name = ( - f"projects/503583131166/locations/us-central1/evaluationRuns/{eval_run_id}" + f"projects/977012026409/locations/us-central1/evaluationRuns/{eval_run_id}" ) evaluation_run = await client.aio.evals.get_evaluation_run(name=eval_run_id) - check_run_5133048044039700480(client, evaluation_run, evaluation_run_name) + check_run_3940878372367761408(client, evaluation_run, evaluation_run_name) assert evaluation_run.evaluation_item_results is None -def check_run_5133048044039700480( +def check_run_3940878372367761408( client, evaluation_run: types.EvaluationRun, evaluation_run_name: str ): assert isinstance(evaluation_run, types.EvaluationRun) assert evaluation_run.name == evaluation_run_name - assert evaluation_run.display_name == "sdk-test-1" - assert evaluation_run.metadata == {"pipeline_id": "4868043098678099968"} + assert ( + evaluation_run.display_name + == "evaluation_run_9a464a39-6d40-4d4e-a5e2-a4ceabea4b15" + ) + assert evaluation_run.metadata == {"pipeline_id": "8162140658019074048"} assert evaluation_run.create_time == datetime.datetime( - 2025, 10, 21, 19, 25, 58, 669441, tzinfo=datetime.timezone.utc + 2026, 3, 18, 1, 10, 13, 360535, tzinfo=datetime.timezone.utc ) assert evaluation_run.completion_time == datetime.datetime( - 2025, 10, 21, 19, 26, 15, 855568, tzinfo=datetime.timezone.utc + 2026, 3, 18, 1, 11, 0, 448191, tzinfo=datetime.timezone.utc ) assert evaluation_run.state == types.EvaluationRunState.SUCCEEDED assert evaluation_run.evaluation_set_snapshot == ( - "projects/503583131166/locations/us-central1/evaluationSets/3122155626046685184" + "projects/977012026409/locations/us-central1/evaluationSets/3885168317211607040" ) assert ( evaluation_run.data_source.evaluation_set - == "projects/503583131166/locations/us-central1/evaluationSets/3122155626046685184" + == "projects/977012026409/locations/us-central1/evaluationSets/3991900109943078912" ) assert evaluation_run.evaluation_run_results.evaluation_set == ( - "projects/503583131166/locations/us-central1/evaluationSets/129513673658990592" - ) - assert evaluation_run.inference_configs == { - "gemini-2.0-flash-001@default": types.EvaluationRunInferenceConfig( - agent_config=types.EvaluationRunAgentConfig( - developer_instruction={ - "parts": [{"text": "example agent developer instruction"}] - }, - tools=[ - genai_types.Tool( - function_declarations=[ - genai_types.FunctionDeclaration( - name="check_chime", - description="Check chime.", - parameters={ - "type": "OBJECT", - "properties": { - "nums": { - "type": "STRING", - "description": "List of numbers to be verified.", - } - }, - "required": ["nums"], - }, - ), - ], - ) - ], - ) - ), - } - assert evaluation_run.evaluation_run_results.summary_metrics == ( - types.SummaryMetric( - metrics={ - "gemini-2.0-flash-001@default/safety_v1/VARIANCE": 0.08950617055834077, - "gemini-2.0-flash-001@default/safety_v1/MAXIMUM": 1, - "gemini-2.0-flash-001@default/universal/AVERAGE": 0.7888888915379842, - "gemini-2.0-flash-001@default/universal/P90": 1, - "gemini-2.0-flash-001@default/safety_v1/MEDIAN": 1, - "gemini-2.0-flash-001@default/universal/P95": 1, - "gemini-2.0-flash-001@default/universal/VARIANCE": 0.08950617055834077, - "gemini-2.0-flash-001@default/universal/STANDARD_DEVIATION": 0.2991758188061675, - "gemini-2.0-flash-001@default/universal/MEDIAN": 1, - "gemini-2.0-flash-001@default/safety_v1/STANDARD_DEVIATION": 0.2991758188061675, - "gemini-2.0-flash-001@default/universal/MODE": 1, - "gemini-2.0-flash-001@default/safety_v1/MODE": 1, - "gemini-2.0-flash-001@default/safety_v1/MINIMUM": 0.3333333432674408, - "gemini-2.0-flash-001@default/safety_v1/P90": 1, - "gemini-2.0-flash-001@default/safety_v1/P95": 1, - "gemini-2.0-flash-001@default/universal/P99": 1, - "gemini-2.0-flash-001@default/safety_v1/AVERAGE": 0.7888888915379842, - "gemini-2.0-flash-001@default/universal/MINIMUM": 0.3333333432674408, - "gemini-2.0-flash-001@default/universal/MAXIMUM": 1, - "gemini-2.0-flash-001@default/safety_v1/P99": 1, - }, - total_items=3, - ) + "projects/977012026409/locations/us-central1/evaluationSets/3885168317211607040" ) + assert evaluation_run.evaluation_run_results.summary_metrics.total_items == 2 assert evaluation_run.error is None -def check_run_5133048044039700480_evaluation_item_results( +def check_run_3940878372367761408_evaluation_item_results( client, evaluation_run: types.EvaluationRun, evaluation_run_name: str ): eval_result = evaluation_run.evaluation_item_results assert isinstance(eval_result, types.EvaluationResult) assert eval_result.summary_metrics == [ types.AggregatedMetricResult( - metric_name="safety_v1", - mean_score=0.7888888915379842, - stdev_score=0.2991758188061675, - ), - types.AggregatedMetricResult( - metric_name="universal", - mean_score=0.7888888915379842, - stdev_score=0.2991758188061675, + metric_name="general_quality_v1", + mean_score=0.13333333656191826, + stdev_score=0.03333333507180214, ), ] - # Check the agent info. - assert eval_result.agent_info == types.evals.AgentInfo( - name="gemini-2.0-flash-001@default", - instruction="example agent developer instruction", - description=None, - tool_declarations=[ - genai_types.Tool( - function_declarations=[ - genai_types.FunctionDeclaration( - name="check_chime", - description="Check chime.", - parameters={ - "type": "OBJECT", - "properties": { - "nums": { - "type": "STRING", - "description": "List of numbers to be verified.", - } - }, - "required": ["nums"], - }, - ), - ], - ) - ], - ) - # Check the first eval case result. - eval_case_result = eval_result.eval_case_results[0] - assert isinstance(eval_case_result, types.EvalCaseResult) - # Check the response candidate results. - response_candidate_result = eval_case_result.response_candidate_results[0] - assert response_candidate_result.response_index == 0 - universal_metric_result = response_candidate_result.metric_results["universal"] - assert isinstance(universal_metric_result, types.EvalCaseMetricResult) - assert universal_metric_result.metric_name == "universal" - assert universal_metric_result.score > 0 - assert universal_metric_result.explanation is None - # Check the first rubric verdict. - rubric_verdict_0 = universal_metric_result.rubric_verdicts[0] - assert isinstance(rubric_verdict_0, types.evals.RubricVerdict) - assert rubric_verdict_0.evaluated_rubric == types.evals.Rubric( - content=types.evals.RubricContent( - property=types.evals.RubricContentProperty( - description="The response is in English." - ) - ), - importance="HIGH", - type="LANGUAGE:PRIMARY_RESPONSE_LANGUAGE", - ) - assert rubric_verdict_0.reasoning is not None - assert rubric_verdict_0.verdict is True - # Check the first evaluation dataset. - eval_dataset = eval_result.evaluation_dataset[0] - assert isinstance(eval_dataset, types.EvaluationDataset) - assert eval_dataset.candidate_name == "gemini-2.0-flash-001@default" - assert eval_dataset.eval_dataset_df.shape[0] == 3 - assert eval_dataset.eval_dataset_df.shape[1] > 3 pytestmark = pytest_helper.setup( diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index 7ec5d8705e..c9fd9ed635 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -256,7 +256,10 @@ def test_eval_evaluate_with_agent_info(self, mock_execute_evaluation): dataset = vertexai_genai_types.EvaluationDataset( eval_dataset_df=pd.DataFrame([{"prompt": "p1", "response": "r1"}]) ) - agent_info = {"agent1": {"name": "agent1", "instruction": "instruction1"}} + agent_info = { + "name": "agent_system", + "agents": {"agent1": {"agent_id": "agent1", "instruction": "instruction1"}}, + } self.client.evals.evaluate( dataset=dataset, metrics=[vertexai_genai_types.Metric(name="exact_match")], @@ -1624,11 +1627,11 @@ def run_async_side_effect(*args, **kwargs): "agents": { "mock_agent": { "agent_id": "mock_agent", - "agent_resource_name": None, "agent_type": "Mock", "instruction": "mock instruction", "description": "mock description", "tools": [], + "sub_agents": [], } }, "turns": [ @@ -1656,11 +1659,11 @@ def run_async_side_effect(*args, **kwargs): "agents": { "mock_agent": { "agent_id": "mock_agent", - "agent_resource_name": None, "agent_type": "Mock", "instruction": "mock instruction", "description": "mock description", "tools": [], + "sub_agents": [], } }, "turns": [ @@ -3466,15 +3469,21 @@ def test_agent_info_creation(self): ] ) agent_info = vertexai_genai_types.evals.AgentInfo( - name="agent1", - instruction="instruction1", - description="description1", - tool_declarations=[tool], + name="agent_system", + agents={ + "agent1": vertexai_genai_types.evals.AgentConfig( + agent_id="agent1", + instruction="instruction1", + description="description1", + tools=[tool], + ) + }, ) - assert agent_info.name == "agent1" - assert agent_info.instruction == "instruction1" - assert agent_info.description == "description1" - assert agent_info.tool_declarations == [tool] + assert agent_info.name == "agent_system" + assert "agent1" in agent_info.agents + assert agent_info.agents["agent1"].instruction == "instruction1" + assert agent_info.agents["agent1"].description == "description1" + assert agent_info.agents["agent1"].tools == [tool] @mock.patch.object(genai_types.FunctionDeclaration, "from_callable_with_api_option") def test_load_from_agent(self, mock_from_callable): @@ -3490,6 +3499,7 @@ def my_search_tool(query: str) -> str: mock_agent.instruction = "mock instruction" mock_agent.description = "mock description" mock_agent.tools = [my_search_tool] + mock_agent.sub_agents = [] agent_info = vertexai_genai_types.evals.AgentInfo.load_from_agent( agent=mock_agent, @@ -3497,15 +3507,15 @@ def my_search_tool(query: str) -> str: ) assert agent_info.name == "mock_agent" - assert agent_info.instruction == "mock instruction" - assert agent_info.description == "mock description" + assert agent_info.agents["mock_agent"].instruction == "mock instruction" + assert agent_info.agents["mock_agent"].description == "mock description" assert ( agent_info.agent_resource_name == "projects/123/locations/abc/reasoningEngines/456" ) - assert len(agent_info.tool_declarations) == 1 - assert isinstance(agent_info.tool_declarations[0], genai_types.Tool) - assert agent_info.tool_declarations[0].function_declarations == [ + assert len(agent_info.agents["mock_agent"].tools) == 1 + assert isinstance(agent_info.agents["mock_agent"].tools[0], genai_types.Tool) + assert agent_info.agents["mock_agent"].tools[0].function_declarations == [ mock_function_declaration ] mock_from_callable.assert_called_once_with(callable=my_search_tool) @@ -3621,7 +3631,9 @@ def test_no_conflict_with_inference_configs(self): dataset = vertexai_genai_types.EvaluationDataset( eval_dataset_df=pd.DataFrame([{"agent_data": {"turns": []}}]) ) - inference_configs = {"cand1": {"agent_configs": {"agent1": {"name": "agent1"}}}} + inference_configs = { + "cand1": {"agent_configs": {"agent1": {"agent_id": "agent1"}}} + } _evals_utils._validate_dataset_agent_data(dataset, inference_configs) def test_no_conflict_if_inference_configs_has_no_agent_configs(self): @@ -3674,9 +3686,14 @@ def test_eval_case_with_agent_eval_fields(self): ] ) agent_info = vertexai_genai_types.evals.AgentInfo( - name="agent1", - instruction="instruction1", - tool_declarations=[tool], + name="agent_system", + agents={ + "agent1": vertexai_genai_types.evals.AgentConfig( + agent_id="agent1", + instruction="instruction1", + tools=[tool], + ) + }, ) intermediate_events = [ vertexai_genai_types.evals.Event( @@ -4546,9 +4563,14 @@ def test_eval_case_to_agent_data(self): ] ) agent_info = vertexai_genai_types.evals.AgentInfo( - name="agent1", - instruction="instruction1", - tool_declarations=[tool], + name="agent_system", + agents={ + "agent1": vertexai_genai_types.evals.AgentConfig( + agent_id="agent1", + instruction="instruction1", + tools=[tool], + ) + }, ) intermediate_events = [ vertexai_genai_types.evals.Event( @@ -4556,6 +4578,7 @@ def test_eval_case_to_agent_data(self): content=genai_types.Content( parts=[genai_types.Part(text="intermediate event")] ), + author="agent1", ) ] eval_case = vertexai_genai_types.EvalCase( @@ -4571,13 +4594,19 @@ def test_eval_case_to_agent_data(self): agent_data = ( _evals_metric_handlers.PredefinedMetricHandler._eval_case_to_agent_data( - eval_case + eval_case, + eval_case.prompt, + eval_case.responses[0].response, ) ) - assert agent_data.agent_config.developer_instruction.text == "instruction1" - assert agent_data.agent_config.legacy_tools.tool == [tool] - assert agent_data.events.event[0].parts[0].text == "intermediate event" + assert "agent1" in agent_data.agents + assert agent_data.agents["agent1"].instruction == "instruction1" + assert agent_data.agents["agent1"].tools == [tool] + assert len(agent_data.turns[0].events) == 3 + assert ( + agent_data.turns[0].events[1].content.parts[0].text == "intermediate event" + ) def test_eval_case_to_agent_data_events_only(self): intermediate_events = [ @@ -4605,8 +4634,10 @@ def test_eval_case_to_agent_data_events_only(self): ) ) - assert agent_data.agent_config is None - assert agent_data.events.event[0].parts[0].text == "intermediate event" + assert agent_data.agents is None + assert ( + agent_data.turns[0].events[0].content.parts[0].text == "intermediate event" + ) def test_eval_case_to_agent_data_empty_event_content(self): intermediate_events = [ @@ -4632,14 +4663,19 @@ def test_eval_case_to_agent_data_empty_event_content(self): ) ) - assert agent_data.agent_config is None - assert not agent_data.events.event + assert agent_data.agents is None + assert agent_data.turns[0].events[0].content is None def test_eval_case_to_agent_data_empty_intermediate_events_list(self): agent_info = vertexai_genai_types.evals.AgentInfo( - name="agent1", - instruction="instruction1", - tool_declarations=[], + name="agent_system", + agents={ + "agent1": vertexai_genai_types.evals.AgentConfig( + agent_id="agent1", + instruction="instruction1", + tools=[], + ) + }, ) eval_case = vertexai_genai_types.EvalCase( @@ -4658,13 +4694,18 @@ def test_eval_case_to_agent_data_empty_intermediate_events_list(self): ) ) - assert not agent_data.events.event + assert agent_data.turns is None def test_eval_case_to_agent_data_agent_info_empty_tools(self): agent_info = vertexai_genai_types.evals.AgentInfo( - name="agent1", - instruction="instruction1", - tool_declarations=[], + name="agent_system", + agents={ + "agent1": vertexai_genai_types.evals.AgentConfig( + agent_id="agent1", + instruction="instruction1", + tools=[], + ) + }, ) eval_case = vertexai_genai_types.EvalCase( prompt=genai_types.Content(parts=[genai_types.Part(text="Hello")]), @@ -4683,8 +4724,8 @@ def test_eval_case_to_agent_data_agent_info_empty_tools(self): ) ) - assert agent_data.agent_config.developer_instruction.text == "instruction1" - assert not agent_data.agent_config.legacy_tools.tool + assert agent_data.agents["agent1"].instruction == "instruction1" + assert not agent_data.agents["agent1"].tools def test_eval_case_to_agent_data_agent_info_empty(self): intermediate_events = [ @@ -4712,7 +4753,7 @@ def test_eval_case_to_agent_data_agent_info_empty(self): ) ) - assert agent_data.agent_config is None + assert agent_data.agents is None @mock.patch.object(_evals_metric_handlers.logger, "warning") def test_tool_use_quality_metric_no_tool_call_logs_warning( @@ -5241,10 +5282,15 @@ def test_execute_evaluation_with_agent_info( ] } agent_info = { - "name": "agent1", - "instruction": "instruction1", - "description": "description1", - "tool_declarations": [tool], + "name": "agent_system", + "agents": { + "agent1": { + "agent_id": "agent1", + "instruction": "instruction1", + "description": "description1", + "tools": [tool], + } + }, } result = _evals_common._execute_evaluation( @@ -5256,9 +5302,10 @@ def test_execute_evaluation_with_agent_info( assert isinstance(result, vertexai_genai_types.EvaluationResult) assert len(result.eval_case_results) == 1 - assert result.agent_info.name == "agent1" - assert result.agent_info.instruction == "instruction1" - assert result.agent_info.tool_declarations == [ + assert result.agent_info.name == "agent_system" + assert "agent1" in result.agent_info.agents + assert result.agent_info.agents["agent1"].instruction == "instruction1" + assert result.agent_info.agents["agent1"].tools == [ genai_types.Tool( function_declarations=[ genai_types.FunctionDeclaration( @@ -6166,9 +6213,8 @@ def test_generate_user_scenarios(self): evals_module = evals.Evals(api_client_=self.mock_api_client) eval_dataset = evals_module.generate_user_scenarios( - agents={"agent_1": {}}, + agent_info={"agents": {"agent_1": {}}, "root_agent_id": "agent_1"}, user_scenario_generation_config={"user_scenario_count": 2}, - root_agent_id="agent_1", ) assert isinstance(eval_dataset, vertexai_genai_types.EvaluationDataset) assert len(eval_dataset.eval_cases) == 2 @@ -6196,9 +6242,8 @@ async def test_async_generate_user_scenarios(self): async_evals_module = evals.AsyncEvals(api_client_=self.mock_api_client) eval_dataset = await async_evals_module.generate_user_scenarios( - agents={"agent_1": {}}, + agent_info={"agents": {"agent_1": {}}, "root_agent_id": "agent_1"}, user_scenario_generation_config={"user_scenario_count": 2}, - root_agent_id="agent_1", ) assert isinstance(eval_dataset, vertexai_genai_types.EvaluationDataset) assert len(eval_dataset.eval_cases) == 2 diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py index ed2ef5b356..c3a488eb06 100644 --- a/vertexai/_genai/_evals_common.py +++ b/vertexai/_genai/_evals_common.py @@ -346,14 +346,7 @@ def _resolve_inference_configs( if agent_info_pydantic and agent_info_pydantic.name: inference_configs = {} inference_configs[agent_info_pydantic.name] = ( - types.EvaluationRunInferenceConfig( - agent_config=types.EvaluationRunAgentConfig( - developer_instruction=genai_types.Content( - parts=[genai_types.Part(text=agent_info_pydantic.instruction)] - ), - tools=agent_info_pydantic.tool_declarations, - ) - ) + types.EvaluationRunInferenceConfig(agent_configs=agent_info_pydantic.agents) ) # Resolve prompt template data if inference_configs: @@ -1771,7 +1764,7 @@ def _run_agent_internal( processed_agent_data = [] agent_data_agents = None if agent: - agent_data_agents = types.evals.AgentData._get_agents_map(agent) + agent_data_agents = types.evals.AgentData.get_agents_map(agent) is_user_simulation = _is_multi_turn_agent_simulation( user_simulator_config, prompt_dataset @@ -2233,12 +2226,17 @@ def _get_agent_info_from_inference_configs( else None ) instruction = di.parts[0].text if di and di.parts and di.parts[0].text else None + tools = agent_config.tools if agent_config and agent_config.tools else None + return types.evals.AgentInfo( name=candidate_names[0], - instruction=instruction, - tool_declarations=( - agent_config.tools if agent_config and agent_config.tools else None - ), + agents={ + "agent_0": types.evals.AgentConfig( + instruction=instruction, + tools=tools, + ) + }, + root_agent_id="agent_0", ) diff --git a/vertexai/_genai/_evals_data_converters.py b/vertexai/_genai/_evals_data_converters.py index 89b3cb9852..33f18b1519 100644 --- a/vertexai/_genai/_evals_data_converters.py +++ b/vertexai/_genai/_evals_data_converters.py @@ -810,6 +810,7 @@ def merge_evaluation_datasets( "conversation_history", "intermediate_events", "agent_data", + "agent_info", }, exclude_none=True, ) @@ -834,6 +835,7 @@ def merge_evaluation_datasets( "conversation_history", "intermediate_events", "agent_data", + "agent_info", }, exclude_none=True, ) @@ -865,7 +867,7 @@ def merge_evaluation_datasets( reference=base_eval_case.reference, system_instruction=base_eval_case.system_instruction, conversation_history=base_eval_case.conversation_history, - agent_info=agent_info, + agent_info=agent_info or base_eval_case.agent_info, agent_data=base_eval_case.agent_data, intermediate_events=base_eval_case.intermediate_events, **eval_case_custom_columns, diff --git a/vertexai/_genai/_evals_metric_handlers.py b/vertexai/_genai/_evals_metric_handlers.py index 19b8aa4fd4..f84d1b3c78 100644 --- a/vertexai/_genai/_evals_metric_handlers.py +++ b/vertexai/_genai/_evals_metric_handlers.py @@ -923,46 +923,69 @@ def _content_to_instance_data( @staticmethod def _eval_case_to_agent_data( eval_case: types.EvalCase, + prompt_content: Optional[genai_types.Content] = None, + response_content: Optional[genai_types.Content] = None, ) -> Optional[types.evals.AgentData]: - """Converts an EvalCase object to an AgentData object.""" + """Converts an EvalCase object to a single turn AgentData object. + + If `eval_case.agent_data` is provided, it is returned directly, and + `prompt_content` and `response_content` are ignored. + """ if getattr(eval_case, "agent_data", None): return eval_case.agent_data - if not eval_case.agent_info and not eval_case.intermediate_events: + if ( + not eval_case.agent_info + and not eval_case.intermediate_events + and not prompt_content + and not response_content + ): return None - tools = None - developer_instruction = None - agent_config = None - tool_declarations = [] - event_contents = [] + agents_map = None if eval_case.agent_info: - agent_info = eval_case.agent_info - if agent_info.instruction: - developer_instruction = types.evals.InstanceData( - text=agent_info.instruction - ) - if agent_info.tool_declarations: - tool_declarations = agent_info.tool_declarations - tools = types.evals.Tools(tool=tool_declarations) - - if tools or developer_instruction: - agent_config = types.evals.AgentConfig( - legacy_tools=tools, - developer_instruction=developer_instruction, + agents_map = eval_case.agent_info.agents + + events = [] + if prompt_content: + events.append( + types.evals.AgentEvent( + author="user", + content=prompt_content, ) + ) if eval_case.intermediate_events: - event_contents = [ - event.content - for event in eval_case.intermediate_events - if event.content + for event in eval_case.intermediate_events: + events.append( + types.evals.AgentEvent( + author=event.author, + content=event.content, + event_time=event.creation_timestamp, + ) + ) + + if response_content: + events.append( + types.evals.AgentEvent( + author="model", + content=response_content, + ) + ) + + turns = None + if events: + turns = [ + types.evals.ConversationTurn( + turn_index=0, + turn_id="turn_0", + events=events, + ) ] - events = types.evals.Events(event=event_contents) return types.evals.AgentData( - agent_config=agent_config, - events=events, + agents=agents_map, + turns=turns, ) def _build_request_payload( @@ -1036,7 +1059,9 @@ def _build_request_payload( if other_data_map else None ), - agent_data=PredefinedMetricHandler._eval_case_to_agent_data(eval_case), + agent_data=PredefinedMetricHandler._eval_case_to_agent_data( + eval_case, extracted_prompt, response_content + ), ) request_payload: dict[str, Any] = { diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index 05b37bd369..0881bd571e 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -2129,7 +2129,7 @@ def create_evaluation_run( raise ValueError( "At most one of agent_info or inference_configs can be provided." ) - agent_info_pydantic = ( + parsed_agent_info = ( evals_types.AgentInfo.model_validate(agent_info) if isinstance(agent_info, dict) else (agent_info or evals_types.AgentInfo()) @@ -2137,7 +2137,7 @@ def create_evaluation_run( if isinstance(dataset, types.EvaluationDataset): _evals_utils._validate_dataset_agent_data(dataset, inference_configs) resolved_dataset = _evals_common._resolve_dataset( - self._api_client, dataset, dest, agent_info_pydantic + self._api_client, dataset, dest, parsed_agent_info ) output_config = genai_types.OutputConfig( gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest) @@ -2149,10 +2149,10 @@ def create_evaluation_run( output_config=output_config, metrics=resolved_metrics ) resolved_inference_configs = _evals_common._resolve_inference_configs( - self._api_client, resolved_dataset, inference_configs, agent_info_pydantic + self._api_client, resolved_dataset, inference_configs, parsed_agent_info ) resolved_labels = _evals_common._add_evaluation_run_labels( - labels, agent_info_pydantic + labels, parsed_agent_info ) resolved_name = name or f"evaluation_run_{uuid.uuid4()}" return self._create_evaluation_run( @@ -2306,26 +2306,29 @@ def create_evaluation_set( def generate_user_scenarios( self, *, - agents: dict[str, evals_types.AgentConfigOrDict], + agent_info: evals_types.AgentInfoOrDict, user_scenario_generation_config: evals_types.UserScenarioGenerationConfigOrDict, - root_agent_id: str, ) -> types.EvaluationDataset: """Generates an evaluation dataset with user scenarios, which helps to generate conversations between a simulated user and the agent under test. Args: - agents: A map of agent ID to AgentConfig. + agent_info: The agent info to generate user scenarios for. user_scenario_generation_config: Configuration for generating user scenarios. - root_agent_id: The ID of the root agent. Returns: An EvaluationDataset containing the generated user scenarios. """ + parsed_agent_info = ( + evals_types.AgentInfo.model_validate(agent_info) + if isinstance(agent_info, dict) + else agent_info + ) response = self._generate_user_scenarios( - agents=agents, + agents=parsed_agent_info.agents, + root_agent_id=parsed_agent_info.root_agent_id, user_scenario_generation_config=user_scenario_generation_config, - root_agent_id=root_agent_id, ) return _evals_utils._postprocess_user_scenarios_response(response) @@ -3304,7 +3307,7 @@ async def create_evaluation_run( raise ValueError( "At most one of agent_info or inference_configs can be provided." ) - agent_info_pydantic = ( + parsed_agent_info = ( evals_types.AgentInfo.model_validate(agent_info) if isinstance(agent_info, dict) else (agent_info or evals_types.AgentInfo()) @@ -3312,7 +3315,7 @@ async def create_evaluation_run( if isinstance(dataset, types.EvaluationDataset): _evals_utils._validate_dataset_agent_data(dataset, inference_configs) resolved_dataset = _evals_common._resolve_dataset( - self._api_client, dataset, dest, agent_info_pydantic + self._api_client, dataset, dest, parsed_agent_info ) output_config = genai_types.OutputConfig( gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest) @@ -3324,10 +3327,10 @@ async def create_evaluation_run( output_config=output_config, metrics=resolved_metrics ) resolved_inference_configs = _evals_common._resolve_inference_configs( - self._api_client, resolved_dataset, inference_configs, agent_info_pydantic + self._api_client, resolved_dataset, inference_configs, parsed_agent_info ) resolved_labels = _evals_common._add_evaluation_run_labels( - labels, agent_info_pydantic + labels, parsed_agent_info ) resolved_name = name or f"evaluation_run_{uuid.uuid4()}" @@ -3488,26 +3491,29 @@ async def create_evaluation_set( async def generate_user_scenarios( self, *, - agents: dict[str, evals_types.AgentConfigOrDict], + agent_info: evals_types.AgentInfoOrDict, user_scenario_generation_config: evals_types.UserScenarioGenerationConfigOrDict, - root_agent_id: str, ) -> types.EvaluationDataset: """Generates an evaluation dataset with user scenarios, which helps to generate conversations between a simulated user and the agent under test. Args: - agents: A map of agent ID to AgentConfig. + agent_info: The agent info to generate user scenarios for. user_scenario_generation_config: Configuration for generating user scenarios. - root_agent_id: The ID of the root agent. Returns: An EvaluationDataset containing the generated user scenarios. """ + parsed_agent_info = ( + evals_types.AgentInfo.model_validate(agent_info) + if isinstance(agent_info, dict) + else agent_info + ) response = await self._generate_user_scenarios( - agents=agents, + agents=parsed_agent_info.agents, + root_agent_id=parsed_agent_info.root_agent_id, user_scenario_generation_config=user_scenario_generation_config, - root_agent_id=root_agent_id, ) return _evals_utils._postprocess_user_scenarios_response(response) diff --git a/vertexai/_genai/types/evals.py b/vertexai/_genai/types/evals.py index b95b4e320d..ec6a9cd092 100644 --- a/vertexai/_genai/types/evals.py +++ b/vertexai/_genai/types/evals.py @@ -36,83 +36,6 @@ class Importance(_common.CaseInSensitiveEnum): """Low importance.""" -class Tools(_common.BaseModel): - """This field is experimental and will be removed in future versions. - - Represents a list of tools for an agent. - """ - - tool: Optional[list[genai_types.Tool]] = Field( - default=None, - description="""List of tools: each tool can have multiple function declarations.""", - ) - - -class ToolsDict(TypedDict, total=False): - """This field is experimental and will be removed in future versions. - - Represents a list of tools for an agent. - """ - - tool: Optional[list[genai_types.ToolDict]] - """List of tools: each tool can have multiple function declarations.""" - - -ToolsOrDict = Union[Tools, ToolsDict] - - -class InstanceDataContents(_common.BaseModel): - """This field is experimental and will be removed in future versions. - - List of standard Content messages from Gemini API. - """ - - contents: Optional[list[genai_types.Content]] = Field( - default=None, description="""Repeated contents.""" - ) - - -class InstanceDataContentsDict(TypedDict, total=False): - """This field is experimental and will be removed in future versions. - - List of standard Content messages from Gemini API. - """ - - contents: Optional[list[genai_types.ContentDict]] - """Repeated contents.""" - - -InstanceDataContentsOrDict = Union[InstanceDataContents, InstanceDataContentsDict] - - -class InstanceData(_common.BaseModel): - """This field is experimental and will be removed in future versions. - - Instance data used to populate placeholders in a metric prompt template. - """ - - text: Optional[str] = Field(default=None, description="""Text data.""") - contents: Optional[InstanceDataContents] = Field( - default=None, description="""List of Gemini content data.""" - ) - - -class InstanceDataDict(TypedDict, total=False): - """This field is experimental and will be removed in future versions. - - Instance data used to populate placeholders in a metric prompt template. - """ - - text: Optional[str] - """Text data.""" - - contents: Optional[InstanceDataContentsDict] - """List of Gemini content data.""" - - -InstanceDataOrDict = Union[InstanceData, InstanceDataDict] - - class AgentConfig(_common.BaseModel): """Represents configuration for an Agent.""" @@ -122,10 +45,6 @@ class AgentConfig(_common.BaseModel): This ID is used to refer to this agent, e.g., in AgentEvent.author, or in the `sub_agents` field. It must be unique within the `agents` map.""", ) - agent_resource_name: Optional[str] = Field( - default=None, - description="""The Agent Engine resource name, formatted as `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""", - ) agent_type: Optional[str] = Field( default=None, description="""The type or class of the agent (e.g., "LlmAgent", "RouterAgent", @@ -152,24 +71,13 @@ class AgentConfig(_common.BaseModel): description="""The list of valid agent IDs that this agent can delegate to. This defines the directed edges in the multi-agent system graph topology.""", ) - tools_text: Optional[str] = Field( - default=None, - description="""A JSON string containing a list of tools available to an agent.""", - ) - legacy_tools: Optional[Tools] = Field( - default=None, description="""List of tools.""" - ) - developer_instruction: Optional[InstanceData] = Field( - default=None, - description="""A field containing instructions from the developer for the agent.""", - ) @staticmethod def _get_tool_declarations_from_agent(agent: Any) -> genai_types.ToolListUnion: """Gets tool declarations from an agent. Args: - agent: The agent to get the tool declarations from. Data type is google.adk.agents.LLMAgent type, use Any to avoid dependency on ADK. + agent: The agent to get the tool declarations from. Data type is google.adk.agents.LLMAgent type. Returns: The tool declarations of the agent. @@ -188,25 +96,29 @@ def _get_tool_declarations_from_agent(agent: Any) -> genai_types.ToolListUnion: return tool_declarations @classmethod - def from_agent( - cls, agent: Any, agent_resource_name: Optional[str] = None - ) -> "AgentConfig": - """Creates an AgentConfig from an ADK agent object. + def from_agent(cls, agent: Any) -> "AgentConfig": + """Creates an AgentConfig from an ADK agent. Args: - agent: The agent to get the agent info from, data type is google.adk.agents.LLMAgent type, use Any to avoid dependency on ADK. - agent_resource_name: Optional. The agent engine resource name. + agent: The agent to get the agent info from, data type is google.adk.agents.LLMAgent type. Returns: - An AgentConfig object populated with the agent's metadata. + An AgentConfig populated with the agent's metadata for evaluation. """ + agent_id = getattr(agent, "name", None) + if not agent_id: + raise ValueError(f"Agent {agent} must have a name.") return cls( # pytype: disable=missing-parameter - agent_id=getattr(agent, "name", "agent_0") or "agent_0", - agent_resource_name=agent_resource_name, + agent_id=agent_id, agent_type=agent.__class__.__name__, description=getattr(agent, "description", None), instruction=getattr(agent, "instruction", None), tools=AgentConfig._get_tool_declarations_from_agent(agent), + sub_agents=[ + str(getattr(sub_agent, "name")) + for sub_agent in getattr(agent, "sub_agents", []) + if getattr(sub_agent, "name", None) is not None + ], ) @@ -218,9 +130,6 @@ class AgentConfigDict(TypedDict, total=False): This ID is used to refer to this agent, e.g., in AgentEvent.author, or in the `sub_agents` field. It must be unique within the `agents` map.""" - agent_resource_name: Optional[str] - """The Agent Engine resource name, formatted as `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""" - agent_type: Optional[str] """The type or class of the agent (e.g., "LlmAgent", "RouterAgent", "ToolUseAgent"). Useful for the autorater to understand the expected @@ -243,15 +152,6 @@ class AgentConfigDict(TypedDict, total=False): """The list of valid agent IDs that this agent can delegate to. This defines the directed edges in the multi-agent system graph topology.""" - tools_text: Optional[str] - """A JSON string containing a list of tools available to an agent.""" - - legacy_tools: Optional[ToolsDict] - """List of tools.""" - - developer_instruction: Optional[InstanceDataDict] - """A field containing instructions from the developer for the agent.""" - AgentConfigOrDict = Union[AgentConfig, AgentConfigDict] @@ -339,30 +239,6 @@ class ConversationTurnDict(TypedDict, total=False): ConversationTurnOrDict = Union[ConversationTurn, ConversationTurnDict] -class Events(_common.BaseModel): - """This field is experimental and will be removed in future versions. - - Represents a list of events for an agent. - """ - - event: Optional[list[genai_types.Content]] = Field( - default=None, description="""A list of events.""" - ) - - -class EventsDict(TypedDict, total=False): - """This field is experimental and will be removed in future versions. - - Represents a list of events for an agent. - """ - - event: Optional[list[genai_types.ContentDict]] - """A list of events.""" - - -EventsOrDict = Union[Events, EventsDict] - - class AgentData(_common.BaseModel): """Represents data specific to multi-turn agent evaluations.""" @@ -378,30 +254,25 @@ class AgentData(_common.BaseModel): Each turn represents a logical execution cycle (e.g., User Input -> Agent Response).""", ) - agent_config: Optional[AgentConfig] = Field( - default=None, description="""Agent configuration.""" - ) - events_text: Optional[str] = Field( - default=None, description="""A JSON string containing a sequence of events.""" - ) - events: Optional[Events] = Field(default=None, description="""A list of events.""") @classmethod - def _get_agents_map(cls, agent: Any) -> dict[str, AgentConfig]: + def get_agents_map(cls, agent: Any) -> dict[str, AgentConfig]: """Recursively gets all agent configs from an agent and its sub-agents. Args: - agent: The agent to get the agent info from. + agent: The agent to get the agent info from, data type is google.adk.agents.LLMAgent type. Returns: A dict mapping agent_id to AgentConfig. """ agent_config = AgentConfig.from_agent(agent) - agent_id = agent_config.agent_id or "agent_0" + agent_id = agent_config.agent_id + if not agent_id: + raise ValueError(f"Agent {agent} must have a name.") agents_map = {agent_id: agent_config} for sub_agent in getattr(agent, "sub_agents", []): - agents_map.update(cls._get_agents_map(sub_agent)) + agents_map.update(cls.get_agents_map(sub_agent)) return agents_map @@ -419,8 +290,8 @@ def from_session(cls, agent: Any, session_history: list[Any]) -> "AgentData": Returns: An AgentData object containing the segmented history and agent config. """ - agents_map = cls._get_agents_map(agent) - agent_id = getattr(agent, "name", "agent_0") or "agent_0" + agents_map = cls.get_agents_map(agent) + agent_id = agent.name turns: list[ConversationTurn] = [] current_turn_events: list[AgentEvent] = [] @@ -510,21 +381,12 @@ class AgentDataDict(TypedDict, total=False): Each turn represents a logical execution cycle (e.g., User Input -> Agent Response).""" - agent_config: Optional[AgentConfigDict] - """Agent configuration.""" - - events_text: Optional[str] - """A JSON string containing a sequence of events.""" - - events: Optional[EventsDict] - """A list of events.""" - AgentDataOrDict = Union[AgentData, AgentDataDict] class AgentInfo(_common.BaseModel): - """The agent info of an agent, used for agent eval.""" + """The agent info of an agent system, used for agent evaluation.""" agent_resource_name: Optional[str] = Field( default=None, @@ -532,93 +394,69 @@ class AgentInfo(_common.BaseModel): `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""", ) name: Optional[str] = Field( - default=None, description="""Agent name, used as an identifier.""" - ) - instruction: Optional[str] = Field( - default=None, description="""Agent developer instruction.""" + default=None, description="""Agent candidate name, used as an identifier.""" ) - description: Optional[str] = Field( - default=None, description="""Agent description.""" + agents: Optional[dict[str, AgentConfig]] = Field( + default=None, + description="""A map containing the static configurations for each agent in the system. + Key: agent_id (matches the `author` field in events). + Value: The static configuration of the agent.""", ) - tool_declarations: Optional[genai_types.ToolListUnion] = Field( - default=None, description="""List of tools used by the Agent.""" + root_agent_id: Optional[str] = Field( + default=None, description="""The agent ID of the root agent.""" ) - @staticmethod - def _get_tool_declarations_from_agent(agent: Any) -> genai_types.ToolListUnion: - """Gets tool declarations from an agent. - - Args: - agent: The agent to get the tool declarations from. Data type is google.adk.agents.LLMAgent type, use Any to avoid dependency on ADK. - - Returns: - The tool declarations of the agent. - """ - tool_declarations: genai_types.ToolListUnion = [] - for tool in agent.tools: - tool_declarations.append( - { - "function_declarations": [ - genai_types.FunctionDeclaration.from_callable_with_api_option( - callable=tool - ) - ] - } - ) - return tool_declarations - @classmethod def load_from_agent( cls, agent: Any, agent_resource_name: Optional[str] = None ) -> "AgentInfo": - """Loads agent info from an agent. + """Loads agent info from an ADK agent. Args: - agent: The agent to get the agent info from, data type is google.adk.agents.LLMAgent type, use Any to avoid dependency on ADK. - agent_resource_name: Optional. The agent engine resource name. + agent: The root agent to get the agent info from, data type is google.adk.agents.LLMAgent type. + agent_resource_name: Optional. The agent engine resource name for the deployed agent. Returns: - The agent info of the agent. + The agent info of the agent system. Example: ``` from vertexai._genai import types - # Assuming 'my_agent' is an instance of google.adk.agents.LLMAgent - agent_info = types.evals.AgentInfo.load_from_agent( agent=my_agent, agent_resource_name="projects/123/locations/us-central1/reasoningEngines/456" ) ``` """ + agent_name = getattr(agent, "name", None) + if not agent_name: + raise ValueError(f"Agent {agent} must have a name.") return cls( # pytype: disable=missing-parameter - name=agent.name, + name=agent_name, agent_resource_name=agent_resource_name, - instruction=agent.instruction, - description=agent.description, - tool_declarations=AgentInfo._get_tool_declarations_from_agent(agent), + agents=AgentData.get_agents_map(agent), + root_agent_id=agent_name, ) class AgentInfoDict(TypedDict, total=False): - """The agent info of an agent, used for agent eval.""" + """The agent info of an agent system, used for agent evaluation.""" agent_resource_name: Optional[str] """The agent engine used to run agent. Agent engine resource name in str type, with format `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""" name: Optional[str] - """Agent name, used as an identifier.""" - - instruction: Optional[str] - """Agent developer instruction.""" + """Agent candidate name, used as an identifier.""" - description: Optional[str] - """Agent description.""" + agents: Optional[dict[str, AgentConfigDict]] + """A map containing the static configurations for each agent in the system. + Key: agent_id (matches the `author` field in events). + Value: The static configuration of the agent.""" - tool_declarations: Optional[genai_types.ToolListUnionDict] - """List of tools used by the Agent.""" + root_agent_id: Optional[str] + """The agent ID of the root agent.""" AgentInfoOrDict = Union[AgentInfo, AgentInfoDict] @@ -854,6 +692,107 @@ class MessageDict(TypedDict, total=False): MessageOrDict = Union[Message, MessageDict] +class Events(_common.BaseModel): + """This field is experimental and will be removed in future versions. + + Represents a list of events for an agent. + """ + + event: Optional[list[genai_types.Content]] = Field( + default=None, description="""A list of events.""" + ) + + +class EventsDict(TypedDict, total=False): + """This field is experimental and will be removed in future versions. + + Represents a list of events for an agent. + """ + + event: Optional[list[genai_types.ContentDict]] + """A list of events.""" + + +EventsOrDict = Union[Events, EventsDict] + + +class InstanceDataContents(_common.BaseModel): + """This field is experimental and will be removed in future versions. + + List of standard Content messages from Gemini API. + """ + + contents: Optional[list[genai_types.Content]] = Field( + default=None, description="""Repeated contents.""" + ) + + +class InstanceDataContentsDict(TypedDict, total=False): + """This field is experimental and will be removed in future versions. + + List of standard Content messages from Gemini API. + """ + + contents: Optional[list[genai_types.ContentDict]] + """Repeated contents.""" + + +InstanceDataContentsOrDict = Union[InstanceDataContents, InstanceDataContentsDict] + + +class InstanceData(_common.BaseModel): + """This field is experimental and will be removed in future versions. + + Instance data used to populate placeholders in a metric prompt template. + """ + + text: Optional[str] = Field(default=None, description="""Text data.""") + contents: Optional[InstanceDataContents] = Field( + default=None, description="""List of Gemini content data.""" + ) + + +class InstanceDataDict(TypedDict, total=False): + """This field is experimental and will be removed in future versions. + + Instance data used to populate placeholders in a metric prompt template. + """ + + text: Optional[str] + """Text data.""" + + contents: Optional[InstanceDataContentsDict] + """List of Gemini content data.""" + + +InstanceDataOrDict = Union[InstanceData, InstanceDataDict] + + +class Tools(_common.BaseModel): + """This field is experimental and will be removed in future versions. + + Represents a list of tools for an agent. + """ + + tool: Optional[list[genai_types.Tool]] = Field( + default=None, + description="""List of tools: each tool can have multiple function declarations.""", + ) + + +class ToolsDict(TypedDict, total=False): + """This field is experimental and will be removed in future versions. + + Represents a list of tools for an agent. + """ + + tool: Optional[list[genai_types.ToolDict]] + """List of tools: each tool can have multiple function declarations.""" + + +ToolsOrDict = Union[Tools, ToolsDict] + + class RubricContentProperty(_common.BaseModel): """Defines criteria based on a specific property."""