Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 114 additions & 14 deletions tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
from tests.unit.vertexai.genai.replays import pytest_helper
from vertexai import types
from google.genai import types as genai_types
import pytest
import pandas as pd
import pytest

GCS_DEST = "gs://lakeyk-limited-bucket/eval_run_output"
GENERAL_QUALITY_METRIC = types.EvaluationRunMetric(
Expand All @@ -42,7 +42,7 @@
metric_config=types.UnifiedMetric(
llm_based_metric_spec=types.LLMBasedMetricSpec(
metric_prompt_template=(
"\nEvaluate the fluency of the response. Provide a score from 1-5."
"\nEvaluate the fluency of the response. Provide a score from" " 1-5."
)
)
),
Expand All @@ -65,7 +65,7 @@
),
)
INFERENCE_CONFIG = types.EvaluationRunInferenceConfig(
model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
model="projects/977012026409/locations/us-central1/publishers/google/models/gemini-2.5-flash"
)
TOOL = genai_types.Tool(
function_declarations=[
Expand All @@ -80,10 +80,16 @@
]
)
AGENT_INFO = types.evals.AgentInfo(
agent_resource_name="projects/123/locations/us-central1/reasoningEngines/456",
agent_resource_name=("projects/123/locations/us-central1/reasoningEngines/456"),
name="agent-1",
instruction="agent-1 instruction",
tool_declarations=[TOOL],
agents={
"agent-1": types.evals.AgentConfig(
agent_id="agent-1",
instruction="agent-1 instruction",
tools=[TOOL],
)
},
root_agent_id="agent-1",
)
DEFAULT_PROMPT_TEMPLATE = "{prompt}"
INPUT_DF_WITH_CONTEXT_AND_HISTORY = pd.DataFrame(
Expand All @@ -96,9 +102,9 @@
}
)
CANDIDATE_NAME = "candidate_1"
MODEL_NAME = "projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
MODEL_NAME = "projects/977012026409/locations/us-central1/publishers/google/models/gemini-2.5-flash"
EVAL_SET_NAME = (
"projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800"
"projects/977012026409/locations/us-central1/evaluationSets/6619939608513740800"
)


Expand Down Expand Up @@ -140,12 +146,11 @@ def test_create_eval_run_data_source_evaluation_set(client):
assert evaluation_run.inference_configs[
AGENT_INFO.name
] == types.EvaluationRunInferenceConfig(
agent_config=types.EvaluationRunAgentConfig(
developer_instruction=genai_types.Content(
parts=[genai_types.Part(text="agent-1 instruction")]
),
tools=[TOOL],
)
agent_configs=AGENT_INFO.agents,
agent_run_config=types.AgentRunConfig(
agent_engine=AGENT_INFO.agent_resource_name,
user_simulator_config={"max_turn": 5},
),
)
assert evaluation_run.labels == {
"vertex-ai-evaluation-agent-engine-id": "456",
Expand Down Expand Up @@ -202,6 +207,53 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
assert evaluation_run.error is None


def test_create_eval_run_with_user_simulator_config(client):
"""Tests that create_evaluation_run() creates a correctly structured EvaluationRun with user_simulator_config."""
client._api_client._http_options.api_version = "v1beta1"
evaluation_run = client.evals.create_evaluation_run(
name="test_user_simulator_config",
display_name="test_user_simulator_config",
dataset=types.EvaluationRunDataSource(
evaluation_set="projects/977012026409/locations/us-central1/evaluationSets/3885168317211607040"
),
dest=GCS_DEST,
metrics=[GENERAL_QUALITY_METRIC],
agent_info=AGENT_INFO,
user_simulator_config=types.evals.UserSimulatorConfig(
max_turn=5,
),
labels={"label1": "value1"},
)
assert isinstance(evaluation_run, types.EvaluationRun)
assert evaluation_run.display_name == "test_user_simulator_config"
assert evaluation_run.state == types.EvaluationRunState.PENDING
assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
assert (
evaluation_run.data_source.evaluation_set
== "projects/977012026409/locations/us-central1/evaluationSets/3885168317211607040"
)
assert evaluation_run.evaluation_config == types.EvaluationRunConfig(
output_config=genai_types.OutputConfig(
gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST)
),
metrics=[GENERAL_QUALITY_METRIC],
)
assert evaluation_run.inference_configs[
AGENT_INFO.name
] == types.EvaluationRunInferenceConfig(
agent_configs=AGENT_INFO.agents,
agent_run_config=types.AgentRunConfig(
agent_engine=AGENT_INFO.agent_resource_name,
user_simulator_config=types.evals.UserSimulatorConfig(max_turn=5),
),
)
assert evaluation_run.labels == {
"vertex-ai-evaluation-agent-engine-id": "456",
"label1": "value1",
}
assert evaluation_run.error is None


def test_create_eval_run_with_inference_configs(client):
"""Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs."""
client._api_client._http_options.api_version = "v1beta1"
Expand Down Expand Up @@ -668,6 +720,54 @@ async def test_create_eval_run_async(client):
assert evaluation_run.error is None


@pytest.mark.asyncio
async def test_create_eval_run_async_with_user_simulator_config(client):
"""Tests that create_evaluation_run() creates a correctly structured EvaluationRun with user_simulator_config asynchronously."""
client._api_client._http_options.api_version = "v1beta1"
evaluation_run = await client.aio.evals.create_evaluation_run(
name="test_user_simulator_config_async",
display_name="test_user_simulator_config_async",
dataset=types.EvaluationRunDataSource(
evaluation_set="projects/977012026409/locations/us-central1/evaluationSets/3885168317211607040"
),
dest=GCS_DEST,
metrics=[GENERAL_QUALITY_METRIC],
agent_info=AGENT_INFO,
user_simulator_config=types.evals.UserSimulatorConfig(
max_turn=5,
),
labels={"label1": "value1"},
)
assert isinstance(evaluation_run, types.EvaluationRun)
assert evaluation_run.display_name == "test_user_simulator_config_async"
assert evaluation_run.state == types.EvaluationRunState.PENDING
assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
assert (
evaluation_run.data_source.evaluation_set
== "projects/977012026409/locations/us-central1/evaluationSets/3885168317211607040"
)
assert evaluation_run.evaluation_config == types.EvaluationRunConfig(
output_config=genai_types.OutputConfig(
gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST)
),
metrics=[GENERAL_QUALITY_METRIC],
)
assert evaluation_run.inference_configs[
AGENT_INFO.name
] == types.EvaluationRunInferenceConfig(
agent_configs=AGENT_INFO.agents,
agent_run_config=types.AgentRunConfig(
agent_engine=AGENT_INFO.agent_resource_name,
user_simulator_config=types.evals.UserSimulatorConfig(max_turn=5),
),
)
assert evaluation_run.labels == {
"label1": "value1",
"vertex-ai-evaluation-agent-engine-id": "456",
}
assert evaluation_run.error is None


@pytest.mark.asyncio
async def test_create_eval_run_async_with_inference_configs(client):
"""Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs asynchronously."""
Expand Down
100 changes: 46 additions & 54 deletions tests/unit/vertexai/genai/replays/test_generate_user_scenarios.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,27 @@
def test_gen_user_scenarios(client):
"""Tests that generate_user_scenarios() correctly calls the API and parses the response."""
eval_dataset = client.evals.generate_user_scenarios(
agents={
"booking-agent": types.evals.AgentConfig(
agent_id="booking-agent",
agent_type="service_agent",
description="An agent capable of booking flights and hotels.",
instruction="You are a helpful travel assistant. Use tools to find flights.",
tools=[
{
"function_declarations": [
{
"name": "search_flights",
"description": "Search for available flights.",
}
]
}
],
)
},
agent_info=types.evals.AgentInfo(
agents={
"booking-agent": types.evals.AgentConfig(
agent_id="booking-agent",
agent_type="service_agent",
description="An agent capable of booking flights and hotels.",
instruction="You are a helpful travel assistant. Use tools to find flights.",
tools=[
{
"function_declarations": [
{
"name": "search_flights",
"description": "Search for available flights.",
}
]
}
],
)
},
root_agent_id="booking-agent",
),
user_scenario_generation_config=types.evals.UserScenarioGenerationConfig(
user_scenario_count=2,
simulation_instruction=(
Expand All @@ -49,18 +52,11 @@ def test_gen_user_scenarios(client):
environment_data="Today is Monday. Flights to Paris are available.",
model_name="gemini-2.5-flash",
),
root_agent_id="booking-agent",
)
assert isinstance(eval_dataset, types.EvaluationDataset)
assert len(eval_dataset.eval_cases) == 2
assert (
eval_dataset.eval_cases[0].user_scenario.starting_prompt
== "I want to find a flight from New York to London."
)
assert (
eval_dataset.eval_cases[0].user_scenario.conversation_plan
== "Actually, I meant Paris, not London. Please search for flights to Paris."
)
assert eval_dataset.eval_cases[0].user_scenario.starting_prompt
assert eval_dataset.eval_cases[0].user_scenario.conversation_plan


pytest_plugins = ("pytest_asyncio",)
Expand All @@ -70,24 +66,27 @@ def test_gen_user_scenarios(client):
async def test_gen_user_scenarios_async(client):
"""Tests that generate_user_scenarios() async correctly calls the API and parses the response."""
eval_dataset = await client.aio.evals.generate_user_scenarios(
agents={
"booking-agent": types.evals.AgentConfig(
agent_id="booking-agent",
agent_type="service_agent",
description="An agent capable of booking flights and hotels.",
instruction="You are a helpful travel assistant. Use tools to find flights.",
tools=[
{
"function_declarations": [
{
"name": "search_flights",
"description": "Search for available flights.",
}
]
}
],
)
},
agent_info=types.evals.AgentInfo(
agents={
"booking-agent": types.evals.AgentConfig(
agent_id="booking-agent",
agent_type="service_agent",
description="An agent capable of booking flights and hotels.",
instruction="You are a helpful travel assistant. Use tools to find flights.",
tools=[
{
"function_declarations": [
{
"name": "search_flights",
"description": "Search for available flights.",
}
]
}
],
)
},
root_agent_id="booking-agent",
),
user_scenario_generation_config=types.evals.UserScenarioGenerationConfig(
user_scenario_count=2,
simulation_instruction=(
Expand All @@ -97,18 +96,11 @@ async def test_gen_user_scenarios_async(client):
environment_data="Today is Monday. Flights to Paris are available.",
model_name="gemini-2.5-flash",
),
root_agent_id="booking-agent",
)
assert isinstance(eval_dataset, types.EvaluationDataset)
assert len(eval_dataset.eval_cases) == 2
assert (
eval_dataset.eval_cases[1].user_scenario.starting_prompt
== "Find me a flight from Boston to Rome for next month."
)
assert (
eval_dataset.eval_cases[1].user_scenario.conversation_plan
== "Wait, change of plans. I need to go to Milan instead, and it needs to be a round trip, returning two weeks after departure."
)
assert eval_dataset.eval_cases[1].user_scenario.starting_prompt
assert eval_dataset.eval_cases[1].user_scenario.conversation_plan


pytestmark = pytest_helper.setup(
Expand Down
Loading
Loading