diff --git a/openevolve/_version.py b/openevolve/_version.py index b43fa8361f..e24e6c8194 100644 --- a/openevolve/_version.py +++ b/openevolve/_version.py @@ -1,3 +1,3 @@ """Version information for openevolve package.""" -__version__ = "0.2.26" +__version__ = "0.2.27" diff --git a/openevolve/api.py b/openevolve/api.py index b921c29180..65c3702064 100644 --- a/openevolve/api.py +++ b/openevolve/api.py @@ -141,6 +141,13 @@ async def _run_evolution_async( # Process evaluator evaluator_path = _prepare_evaluator(evaluator, temp_dir, temp_files) + # Auto-disable cascade evaluation if the evaluator doesn't define stage functions + if config_obj.evaluator.cascade_evaluation: + with open(evaluator_path, "r") as f: + eval_content = f.read() + if "evaluate_stage1" not in eval_content: + config_obj.evaluator.cascade_evaluation = False + # Create and run controller controller = OpenEvolve( initial_program_path=program_path, @@ -239,13 +246,40 @@ def _prepare_evaluator( # If it's a callable, create a wrapper module if callable(evaluator): - # Create a unique global name for this evaluator - evaluator_id = f"_openevolve_evaluator_{uuid.uuid4().hex[:8]}" + # Try to get the source code of the callable so it can be serialized + # into a standalone file that works in subprocesses + try: + func_source = inspect.getsource(evaluator) + # Dedent in case the function was defined inside another scope + import textwrap + + func_source = textwrap.dedent(func_source) + func_name = evaluator.__name__ + + # Build a self-contained evaluator module with the function source + # and an evaluate() entry point that calls it + evaluator_code = f""" +# Auto-generated evaluator from user-provided callable +import importlib.util +import sys +import os +import copy +import json +import time + +{func_source} - # Store in globals so the wrapper can find it - globals()[evaluator_id] = evaluator +def evaluate(program_path): + '''Wrapper that calls the user-provided evaluator function''' + return {func_name}(program_path) +""" + except (OSError, TypeError): + # If we can't get source (e.g. built-in, lambda, or closure), + # fall back to the globals-based approach + evaluator_id = f"_openevolve_evaluator_{uuid.uuid4().hex[:8]}" + globals()[evaluator_id] = evaluator - evaluator_code = f""" + evaluator_code = f""" # Wrapper for user-provided evaluator function import {__name__} as api_module @@ -335,57 +369,67 @@ def initial_sort(arr): lines.insert(func_end + 1, " " * (indent + 4) + "# EVOLVE-BLOCK-END") func_source = "\n".join(lines) - # Create evaluator that tests the function - def evaluator(program_path): - import importlib.util - import sys + # Create a self-contained evaluator as a code string so it works in subprocesses. + # Closure-based evaluators fail with process-based parallelism because subprocess + # workers cannot access the parent process's memory. + evaluator_code = f""" +import importlib.util +import copy - # Load the evolved program - spec = importlib.util.spec_from_file_location("evolved", program_path) - if spec is None or spec.loader is None: - return {"score": 0.0, "error": "Failed to load program"} +FUNC_NAME = {func_name!r} +TEST_CASES = {test_cases!r} - module = importlib.util.module_from_spec(spec) +def evaluate(program_path): + '''Auto-generated evaluator for evolve_function''' + # Load the evolved program + spec = importlib.util.spec_from_file_location("evolved", program_path) + if spec is None or spec.loader is None: + return {{"combined_score": 0.0, "score": 0.0, "error": "Failed to load program"}} - try: - spec.loader.exec_module(module) - except Exception as e: - return {"score": 0.0, "error": f"Failed to execute program: {str(e)}"} + module = importlib.util.module_from_spec(spec) - if not hasattr(module, func_name): - return {"score": 0.0, "error": f"Function '{func_name}' not found"} + try: + spec.loader.exec_module(module) + except Exception as e: + return {{"combined_score": 0.0, "score": 0.0, "error": f"Failed to execute program: {{str(e)}}"}} - evolved_func = getattr(module, func_name) - correct = 0 - total = len(test_cases) - errors = [] + if not hasattr(module, FUNC_NAME): + return {{"combined_score": 0.0, "score": 0.0, "error": f"Function '{{FUNC_NAME}}' not found"}} - for input_val, expected in test_cases: - try: - # Handle case where input is a list/mutable - make a copy - if isinstance(input_val, list): - test_input = input_val.copy() - else: - test_input = input_val - - result = evolved_func(test_input) - if result == expected: - correct += 1 - else: - errors.append(f"Input {input_val}: expected {expected}, got {result}") - except Exception as e: - errors.append(f"Input {input_val}: {str(e)}") - - return { - "score": correct / total, - "test_pass_rate": correct / total, - "tests_passed": correct, - "total_tests": total, - "errors": errors[:3], # Limit error details - } + evolved_func = getattr(module, FUNC_NAME) + correct = 0 + total = len(TEST_CASES) + errors = [] + + for input_val, expected in TEST_CASES: + try: + # Handle case where input is a list/mutable - make a copy + if isinstance(input_val, list): + test_input = input_val.copy() + else: + test_input = input_val + + result = evolved_func(test_input) + if result == expected: + correct += 1 + else: + errors.append(f"Input {{input_val}}: expected {{expected}}, got {{result}}") + except Exception as e: + errors.append(f"Input {{input_val}}: {{str(e)}}") + + score = correct / total if total > 0 else 0.0 + return {{ + "combined_score": score, + "score": score, + "test_pass_rate": score, + "tests_passed": correct, + "total_tests": total, + "errors": errors[:3], + }} +""" return run_evolution( - initial_program=func_source, evaluator=evaluator, iterations=iterations, **kwargs + initial_program=func_source, evaluator=evaluator_code, iterations=iterations, **kwargs ) @@ -447,36 +491,51 @@ def benchmark_sort(instance): lines.append(" " * (indent + 4) + "# EVOLVE-BLOCK-END") class_source = "\n".join(lines) - # Create evaluator - def evaluator(program_path): - import importlib.util + # Create a self-contained evaluator as a code string so it works in subprocesses. + import textwrap - # Load the evolved program - spec = importlib.util.spec_from_file_location("evolved", program_path) - if spec is None or spec.loader is None: - return {"score": 0.0, "error": "Failed to load program"} + class_name = algorithm_class.__name__ + benchmark_source = textwrap.dedent(inspect.getsource(benchmark)) - module = importlib.util.module_from_spec(spec) + evaluator_code = f""" +import importlib.util - try: - spec.loader.exec_module(module) - except Exception as e: - return {"score": 0.0, "error": f"Failed to execute program: {str(e)}"} +CLASS_NAME = {class_name!r} - if not hasattr(module, algorithm_class.__name__): - return {"score": 0.0, "error": f"Class '{algorithm_class.__name__}' not found"} +{benchmark_source} - AlgorithmClass = getattr(module, algorithm_class.__name__) +def evaluate(program_path): + '''Auto-generated evaluator for evolve_algorithm''' + spec = importlib.util.spec_from_file_location("evolved", program_path) + if spec is None or spec.loader is None: + return {{"combined_score": 0.0, "score": 0.0, "error": "Failed to load program"}} - try: - instance = AlgorithmClass() - metrics = benchmark(instance) - return metrics if isinstance(metrics, dict) else {"score": metrics} - except Exception as e: - return {"score": 0.0, "error": str(e)} + module = importlib.util.module_from_spec(spec) + + try: + spec.loader.exec_module(module) + except Exception as e: + return {{"combined_score": 0.0, "score": 0.0, "error": f"Failed to execute program: {{str(e)}}"}} + + if not hasattr(module, CLASS_NAME): + return {{"combined_score": 0.0, "score": 0.0, "error": f"Class '{{CLASS_NAME}}' not found"}} + + AlgorithmClass = getattr(module, CLASS_NAME) + + try: + instance = AlgorithmClass() + metrics = {benchmark.__name__}(instance) + if not isinstance(metrics, dict): + metrics = {{"score": metrics}} + if "combined_score" not in metrics: + metrics["combined_score"] = metrics.get("score", 0.0) + return metrics + except Exception as e: + return {{"combined_score": 0.0, "score": 0.0, "error": str(e)}} +""" return run_evolution( - initial_program=class_source, evaluator=evaluator, iterations=iterations, **kwargs + initial_program=class_source, evaluator=evaluator_code, iterations=iterations, **kwargs ) diff --git a/tests/test_api.py b/tests/test_api.py index db09d0bbf6..9316efc1d1 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -116,17 +116,55 @@ def test_prepare_evaluator_from_callable(self): """Test _prepare_evaluator with callable function""" def my_evaluator(program_path): return {"score": 0.8, "test": "passed"} - + temp_files = [] result = _prepare_evaluator(my_evaluator, self.temp_dir, temp_files) - + self.assertTrue(os.path.exists(result)) self.assertEqual(len(temp_files), 1) - + with open(result, 'r') as f: content = f.read() self.assertIn("def evaluate(program_path)", content) - self.assertIn("user_evaluator", content) + self.assertIn("my_evaluator", content) + + def test_prepare_evaluator_callable_works_in_subprocess(self): + """Test that callable evaluator can be executed in a subprocess""" + import subprocess + import sys + + def my_evaluator(program_path): + return {"score": 0.8, "combined_score": 0.8} + + temp_files = [] + eval_file = _prepare_evaluator(my_evaluator, self.temp_dir, temp_files) + + # Write a dummy program file for the evaluator to receive + program_file = os.path.join(self.temp_dir, "dummy_program.py") + with open(program_file, 'w') as f: + f.write("x = 1\n") + + # Run the evaluator in a subprocess (simulating process-based parallelism) + test_script = os.path.join(self.temp_dir, "run_eval.py") + with open(test_script, 'w') as f: + f.write(f""" +import sys +import importlib.util +spec = importlib.util.spec_from_file_location("evaluator", {eval_file!r}) +mod = importlib.util.module_from_spec(spec) +spec.loader.exec_module(mod) +result = mod.evaluate({program_file!r}) +assert isinstance(result, dict), f"Expected dict, got {{type(result)}}" +assert result["score"] == 0.8, f"Expected score 0.8, got {{result['score']}}" +print("OK") +""") + + proc = subprocess.run( + [sys.executable, test_script], + capture_output=True, text=True, timeout=10 + ) + self.assertEqual(proc.returncode, 0, f"Subprocess failed: {proc.stderr}") + self.assertIn("OK", proc.stdout) def test_prepare_evaluator_from_string(self): """Test _prepare_evaluator with code string""" @@ -159,12 +197,12 @@ def initial_sort(arr): if arr[j] > arr[j+1]: arr[j], arr[j+1] = arr[j+1], arr[j] return arr - + test_cases = [ ([3, 1, 2], [1, 2, 3]), ([5, 2], [2, 5]), ] - + # Mock the async controller to avoid actual evolution with unittest.mock.patch('openevolve.api._run_evolution_async') as mock_async: mock_async.return_value = EvolutionResult( @@ -174,12 +212,78 @@ def initial_sort(arr): metrics={"score": 1.0, "test_pass_rate": 1.0}, output_dir=None ) - + result = evolve_function(initial_sort, test_cases, iterations=1) - + self.assertIsInstance(result, EvolutionResult) self.assertEqual(result.best_score, 1.0) mock_async.assert_called_once() + + def test_evolve_function_evaluator_works_in_subprocess(self): + """Test that evolve_function generates an evaluator that works in a subprocess. + + This is a regression test for the bug where callable evaluators stored in + globals() could not be accessed by process-based worker subprocesses. + """ + import subprocess + import sys + + def bubble_sort(arr): + for i in range(len(arr)): + for j in range(len(arr) - 1): + if arr[j] > arr[j + 1]: + arr[j], arr[j + 1] = arr[j + 1], arr[j] + return arr + + test_cases = [([3, 1, 2], [1, 2, 3]), ([5, 2, 8], [2, 5, 8])] + + # Call evolve_function but intercept the evaluator code it generates + # by capturing what gets passed to run_evolution + with unittest.mock.patch('openevolve.api.run_evolution') as mock_run: + mock_run.return_value = EvolutionResult( + best_program=None, best_score=1.0, + best_code="", metrics={}, output_dir=None + ) + evolve_function(bubble_sort, test_cases, iterations=1) + + # Extract the evaluator code string passed to run_evolution + call_kwargs = mock_run.call_args + evaluator_code = call_kwargs.kwargs.get('evaluator') or call_kwargs[1].get('evaluator') + + self.assertIsInstance(evaluator_code, str, "evolve_function should pass evaluator as code string") + self.assertIn("def evaluate(program_path)", evaluator_code) + self.assertIn("combined_score", evaluator_code) + + # Write the evaluator to a file + eval_file = os.path.join(self.temp_dir, "eval_test.py") + with open(eval_file, 'w') as f: + f.write(evaluator_code) + + # Write a correct program for the evaluator to test + program_file = os.path.join(self.temp_dir, "program.py") + with open(program_file, 'w') as f: + f.write("def bubble_sort(arr):\n return sorted(arr)\n") + + # Run in a subprocess to verify it works across process boundaries + test_script = os.path.join(self.temp_dir, "run_eval.py") + with open(test_script, 'w') as f: + f.write(f""" +import importlib.util +spec = importlib.util.spec_from_file_location("evaluator", {eval_file!r}) +mod = importlib.util.module_from_spec(spec) +spec.loader.exec_module(mod) +result = mod.evaluate({program_file!r}) +assert result["combined_score"] == 1.0, f"Expected 1.0, got {{result['combined_score']}}" +assert result["tests_passed"] == 2, f"Expected 2, got {{result['tests_passed']}}" +print("OK") +""") + + proc = subprocess.run( + [sys.executable, test_script], + capture_output=True, text=True, timeout=10 + ) + self.assertEqual(proc.returncode, 0, f"Subprocess failed: {proc.stderr}") + self.assertIn("OK", proc.stdout) def test_evolve_algorithm_basic(self): """Test evolve_algorithm with simple class"""