diff --git a/phone_agent/actions/handler.py b/phone_agent/actions/handler.py index 0bef1c3a..25c83255 100644 --- a/phone_agent/actions/handler.py +++ b/phone_agent/actions/handler.py @@ -348,7 +348,10 @@ def parse_action(response: str) -> dict[str, Any]: if response.startswith('do(action="Type"') or response.startswith( 'do(action="Type_Name"' ): - text = response.split("text=", 1)[1][1:-2] + parts = response.split("text=", 1) + if len(parts) < 2: + raise ValueError(f"Missing 'text=' parameter in Type action: {response}") + text = parts[1][1:-2] action = {"_metadata": "do", "action": "Type", "text": text} return action elif response.startswith("do"): diff --git a/phone_agent/adb/screenshot.py b/phone_agent/adb/screenshot.py index bdc5b092..394eb315 100644 --- a/phone_agent/adb/screenshot.py +++ b/phone_agent/adb/screenshot.py @@ -66,22 +66,25 @@ def get_screenshot(device_id: str | None = None, timeout: int = 10) -> Screensho return _create_fallback_screenshot(is_sensitive=False) # Read and encode image - img = Image.open(temp_path) - width, height = img.size + try: + img = Image.open(temp_path) + width, height = img.size - buffered = BytesIO() - img.save(buffered, format="PNG") - base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8") + buffered = BytesIO() + img.save(buffered, format="PNG") + base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8") - # Cleanup - os.remove(temp_path) - - return Screenshot( - base64_data=base64_data, width=width, height=height, is_sensitive=False - ) + return Screenshot( + base64_data=base64_data, width=width, height=height, is_sensitive=False + ) + finally: + if os.path.exists(temp_path): + os.remove(temp_path) except Exception as e: print(f"Screenshot error: {e}") + if os.path.exists(temp_path): + os.remove(temp_path) return _create_fallback_screenshot(is_sensitive=False) diff --git a/phone_agent/hdc/screenshot.py b/phone_agent/hdc/screenshot.py index 332d198c..f8ecc644 100644 --- a/phone_agent/hdc/screenshot.py +++ b/phone_agent/hdc/screenshot.py @@ -82,22 +82,25 @@ def get_screenshot(device_id: str | None = None, timeout: int = 10) -> Screensho # Read JPEG image and convert to PNG for model inference # PIL automatically detects the image format from file content - img = Image.open(temp_path) - width, height = img.size + try: + img = Image.open(temp_path) + width, height = img.size - buffered = BytesIO() - img.save(buffered, format="PNG") - base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8") + buffered = BytesIO() + img.save(buffered, format="PNG") + base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8") - # Cleanup - os.remove(temp_path) - - return Screenshot( - base64_data=base64_data, width=width, height=height, is_sensitive=False - ) + return Screenshot( + base64_data=base64_data, width=width, height=height, is_sensitive=False + ) + finally: + if os.path.exists(temp_path): + os.remove(temp_path) except Exception as e: print(f"Screenshot error: {e}") + if os.path.exists(temp_path): + os.remove(temp_path) return _create_fallback_screenshot(is_sensitive=False)