diff --git a/crawl4ai/async_configs.py b/crawl4ai/async_configs.py index 44d3040b5..b62f6edc8 100644 --- a/crawl4ai/async_configs.py +++ b/crawl4ai/async_configs.py @@ -586,6 +586,10 @@ class BrowserConfig: Default: []. enable_stealth (bool): If True, applies playwright-stealth to bypass basic bot detection. Cannot be used with use_undetected browser mode. Default: False. + no_sandbox (bool): If True (default), passes --no-sandbox to the browser. Set to False to + enable Chromium's sandbox protections, which is required by some enterprise + security policies. Note: running with sandbox enabled requires proper OS-level + setup (e.g., user namespaces on Linux). Default: True. memory_saving_mode (bool): If True, adds aggressive cache discard and V8 heap cap flags to reduce Chromium memory growth. Recommended for high-volume crawling (1000+ pages). May slightly reduce performance due to @@ -650,6 +654,7 @@ def __init__( avoid_ads: bool = False, avoid_css: bool = False, init_scripts: List[str] = None, + no_sandbox: bool = True, memory_saving_mode: bool = False, max_pages_before_recycle: int = 0, ): @@ -717,6 +722,7 @@ def __init__( self.avoid_ads = avoid_ads self.avoid_css = avoid_css self.init_scripts = init_scripts if init_scripts is not None else [] + self.no_sandbox = no_sandbox self.memory_saving_mode = memory_saving_mode self.max_pages_before_recycle = max_pages_before_recycle @@ -812,6 +818,7 @@ def to_dict(self): "avoid_ads": self.avoid_ads, "avoid_css": self.avoid_css, "init_scripts": self.init_scripts, + "no_sandbox": self.no_sandbox, "memory_saving_mode": self.memory_saving_mode, "max_pages_before_recycle": self.max_pages_before_recycle, } diff --git a/crawl4ai/browser_manager.py b/crawl4ai/browser_manager.py index 0b429c34d..d1f63612b 100644 --- a/crawl4ai/browser_manager.py +++ b/crawl4ai/browser_manager.py @@ -70,7 +70,6 @@ class ManagedBrowser: def build_browser_flags(config: BrowserConfig) -> List[str]: """Common CLI flags for launching Chromium""" flags = [ - "--no-sandbox", "--disable-dev-shm-usage", "--no-first-run", "--no-default-browser-check", @@ -90,6 +89,8 @@ def build_browser_flags(config: BrowserConfig) -> List[str]: "--disable-component-update", "--disable-domain-reliability", ] + if config.no_sandbox: + flags.append("--no-sandbox") # GPU flags disable WebGL which anti-bot sensors detect as headless. # Keep WebGL working (via SwiftShader) when stealth mode is active. if not config.enable_stealth: @@ -1060,7 +1061,6 @@ def _build_browser_args(self) -> dict: "--disable-gpu", "--disable-gpu-compositing", "--disable-software-rasterizer", - "--no-sandbox", "--disable-dev-shm-usage", "--no-first-run", "--no-default-browser-check", @@ -1083,6 +1083,9 @@ def _build_browser_args(self) -> dict: f"--window-size={self.config.viewport_width},{self.config.viewport_height}", ] + if self.config.no_sandbox: + args.append("--no-sandbox") + if self.config.memory_saving_mode: args.extend([ "--aggressive-cache-discard",