zai-org · Ricardo-M-L · Apr 14, 2026 · Apr 16, 2026
diff --git a/phone_agent/actions/handler.py b/phone_agent/actions/handler.py
@@ -348,7 +348,10 @@ def parse_action(response: str) -> dict[str, Any]:
         if response.startswith('do(action="Type"') or response.startswith(
             'do(action="Type_Name"'
         ):
-            text = response.split("text=", 1)[1][1:-2]
+            parts = response.split("text=", 1)
+            if len(parts) < 2:
+                raise ValueError(f"Missing 'text=' parameter in Type action: {response}")
+            text = parts[1][1:-2]
             action = {"_metadata": "do", "action": "Type", "text": text}
             return action
         elif response.startswith("do"):

diff --git a/phone_agent/adb/screenshot.py b/phone_agent/adb/screenshot.py
@@ -66,22 +66,25 @@ def get_screenshot(device_id: str | None = None, timeout: int = 10) -> Screensho
             return _create_fallback_screenshot(is_sensitive=False)
 
         # Read and encode image
-        img = Image.open(temp_path)
-        width, height = img.size
+        try:
+            img = Image.open(temp_path)
+            width, height = img.size
 
-        buffered = BytesIO()
-        img.save(buffered, format="PNG")
-        base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
+            buffered = BytesIO()
+            img.save(buffered, format="PNG")
+            base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
 
-        # Cleanup
-        os.remove(temp_path)
-
-        return Screenshot(
-            base64_data=base64_data, width=width, height=height, is_sensitive=False
-        )
+            return Screenshot(
+                base64_data=base64_data, width=width, height=height, is_sensitive=False
+            )
+        finally:
+            if os.path.exists(temp_path):
+                os.remove(temp_path)
 
     except Exception as e:
         print(f"Screenshot error: {e}")
+        if os.path.exists(temp_path):
+            os.remove(temp_path)
         return _create_fallback_screenshot(is_sensitive=False)
 
 

diff --git a/phone_agent/hdc/screenshot.py b/phone_agent/hdc/screenshot.py
@@ -82,22 +82,25 @@ def get_screenshot(device_id: str | None = None, timeout: int = 10) -> Screensho
 
         # Read JPEG image and convert to PNG for model inference
         # PIL automatically detects the image format from file content
-        img = Image.open(temp_path)
-        width, height = img.size
+        try:
+            img = Image.open(temp_path)
+            width, height = img.size
 
-        buffered = BytesIO()
-        img.save(buffered, format="PNG")
-        base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
+            buffered = BytesIO()
+            img.save(buffered, format="PNG")
+            base64_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
 
-        # Cleanup
-        os.remove(temp_path)
-
-        return Screenshot(
-            base64_data=base64_data, width=width, height=height, is_sensitive=False
-        )
+            return Screenshot(
+                base64_data=base64_data, width=width, height=height, is_sensitive=False
+            )
+        finally:
+            if os.path.exists(temp_path):
+                os.remove(temp_path)
 
     except Exception as e:
         print(f"Screenshot error: {e}")
+        if os.path.exists(temp_path):
+            os.remove(temp_path)
         return _create_fallback_screenshot(is_sensitive=False)