diff --git a/packages/notte-browser/src/notte_browser/controller.py b/packages/notte-browser/src/notte_browser/controller.py index 224c800be..28a1424f3 100644 --- a/packages/notte-browser/src/notte_browser/controller.py +++ b/packages/notte-browser/src/notte_browser/controller.py @@ -41,6 +41,7 @@ from notte_core.storage import BaseStorage from notte_core.utils.code import text_contains_tabs from notte_core.utils.platform import platform_control_key +from pydantic import BaseModel, Field from typing_extensions import final from notte_browser.captcha import CaptchaHandler @@ -59,15 +60,70 @@ from notte_browser.window import BrowserWindow +class ActionBlocklist(BaseModel): + """Blocklist policy for browser actions. + + - disallow_types: exact action types to block (e.g., "goto", "click") + - keywords: element text keywords to block for interaction actions (case-insensitive) + """ + + disallow_types: set[str] = Field(default_factory=set) + keywords: list[str] = Field(default_factory=list) + + def is_type_blocked(self, action: BaseAction) -> bool: + return action.type in self.disallow_types + + def is_keyword_blocked(self, action: BaseAction, prev_snapshot: BrowserSnapshot | None) -> bool: + if prev_snapshot is None: + return False + if not isinstance(action, InteractionAction): + return False + # id in InteractionAction is always a string; check emptiness only + if len(action.id) == 0: + return False + node = prev_snapshot.dom_node.find(action.id) + if node is None: + return False + # Aggregate potential text sources for matching + texts: list[str] = [] + try: + texts.append(node.inner_text()) + except Exception: + pass + try: + texts.append(node.text) + except Exception: + pass + try: + if node.attributes is not None: + for key in ("title", "aria_label", "name", "placeholder"): + val = getattr(node.attributes, key, None) + if isinstance(val, str): + texts.append(val) + except Exception: + pass + # `texts` is a list of strings; join directly + text = " ".join(texts).lower() + for kw in self.keywords: + if kw.lower() in text: + return True + return False + + def is_blocked(self, action: BaseAction, prev_snapshot: BrowserSnapshot | None) -> bool: + return self.is_type_blocked(action) or self.is_keyword_blocked(action, prev_snapshot) + + @final class BrowserController: def __init__( self, verbose: bool, storage: BaseStorage | None = None, + blocklist: ActionBlocklist | None = None, ) -> None: self.verbose: bool = verbose self.storage: BaseStorage | None = storage + self.blocklist: ActionBlocklist | None = blocklist async def switch_tab(self, window: BrowserWindow, tab_index: int) -> None: context = window.page.context @@ -373,6 +429,16 @@ async def execute( context = window.page.context num_pages = len(context.pages) retval = True + + # Enforce blocklist policy before execution + if self.blocklist is not None and self.blocklist.is_blocked(action, prev_snapshot): + # Raise an explicit error handled by session + raise ActionExecutionError( + action_id=getattr(action, "id", action.type), + url=window.page.url, + reason="Unauthorized action (blocked by policy)", + ) + match action: case InteractionAction(): retval = await self.execute_interaction_action(window, action, prev_snapshot) diff --git a/packages/notte-browser/src/notte_browser/session.py b/packages/notte-browser/src/notte_browser/session.py index 5be7c2087..e0461f3fd 100644 --- a/packages/notte-browser/src/notte_browser/session.py +++ b/packages/notte-browser/src/notte_browser/session.py @@ -77,7 +77,7 @@ from notte_browser.action_selection.pipe import ActionSelectionPipe from notte_browser.captcha import CaptchaHandler -from notte_browser.controller import BrowserController +from notte_browser.controller import ActionBlocklist, BrowserController from notte_browser.dom.locate import locate_element from notte_browser.errors import ( BrowserNotStartedError, @@ -114,6 +114,7 @@ def __init__( tools: list[BaseTool] | None = None, window: BrowserWindow | None = None, keep_alive: bool = False, + blocklist: ActionBlocklist | None = None, **data: Unpack[SessionStartRequestDict], ) -> None: self._request: SessionStartRequest = SessionStartRequest.model_validate(data) @@ -121,7 +122,9 @@ def __init__( raise CaptchaSolverNotAvailableError() self.screenshot_type: ScreenshotType = self._request.screenshot_type self._window: BrowserWindow | None = window - self.controller: BrowserController = BrowserController(verbose=config.verbose, storage=storage) + self.controller: BrowserController = BrowserController( + verbose=config.verbose, storage=storage, blocklist=blocklist + ) self.storage: BaseStorage | None = storage llmserve = LLMService.from_config(perception_type=perception_type) self._action_space_pipe: MainActionSpacePipe = MainActionSpacePipe(llmserve=llmserve) diff --git a/packages/notte-core/src/notte_core/browser/highlighter.py b/packages/notte-core/src/notte_core/browser/highlighter.py index c16a952b5..56204e528 100644 --- a/packages/notte-core/src/notte_core/browser/highlighter.py +++ b/packages/notte-core/src/notte_core/browser/highlighter.py @@ -1,7 +1,8 @@ import io +from collections.abc import Iterable from dataclasses import dataclass from enum import Enum -from typing import ClassVar +from typing import Any, ClassVar, cast from PIL import Image, ImageDraw, ImageFont from pydantic import BaseModel @@ -141,8 +142,13 @@ def is_area_uniform_color(image: Image.Image, rect: Rectangle, config: LabelConf if region.mode != "RGB": region = region.convert("RGB") - # Get pixel data - pixels: list[list[int]] = list(region.getdata()) # pyright: ignore [reportUnknownArgumentType, reportUnknownMemberType] + # Get pixel data. + # Note: Pillow's Image.getdata() returns an internal ImagingCore type whose + # typing is not mode-aware; stubs expose it with unknown member types. + # For RGB images, elements are 3-tuples[int, int, int]. We cast via Any + # and add a targeted ignore to satisfy the type checker without changing behavior. + data_any: Any = region.getdata() # pyright: ignore[reportUnknownMemberType] + pixels: list[tuple[int, int, int]] = list(cast(Iterable[tuple[int, int, int]], data_any)) if not pixels: return True diff --git a/packages/notte-eval/src/notte_eval/data/webvoyager/convert.py b/packages/notte-eval/src/notte_eval/data/webvoyager/convert.py index bf5489fc0..dd0d8da93 100644 --- a/packages/notte-eval/src/notte_eval/data/webvoyager/convert.py +++ b/packages/notte-eval/src/notte_eval/data/webvoyager/convert.py @@ -20,4 +20,4 @@ ) merged["id"] = "webvoyager--" + merged["id"] -merged.to_json("output.jsonl", orient="records", lines=True) # type: ignore +merged.to_json("output.jsonl", orient="records", lines=True) # pyright: ignore[reportUnknownMemberType] diff --git a/tests/browser/test_blocklist.py b/tests/browser/test_blocklist.py new file mode 100644 index 000000000..8843ac8b4 --- /dev/null +++ b/tests/browser/test_blocklist.py @@ -0,0 +1,59 @@ +from notte_browser.controller import ActionBlocklist +from notte_core.actions import ClickAction, GotoAction +from notte_core.browser.dom_tree import ComputedDomAttributes, DomAttributes, DomNode, NodeRole, NodeType +from notte_core.browser.snapshot import BrowserSnapshot, SnapshotMetadata, TabsData, ViewportData + + +def make_snapshot_with_button(button_id: str, text: str) -> BrowserSnapshot: + # Minimal DOM tree with one interactive node + attrs = DomAttributes.safe_init(tag_name="button", title=text, name=text) + computed = ComputedDomAttributes() + node = DomNode( + id=button_id, + type=NodeType.INTERACTION, + role=NodeRole.BUTTON, + text=text, + children=[], + attributes=attrs, + computed_attributes=computed, + ) + + vp = ViewportData( + scroll_x=0, scroll_y=0, viewport_width=800, viewport_height=600, total_width=800, total_height=600 + ) + tabs = [TabsData(tab_id=0, title="Test", url="https://example.com")] + meta = SnapshotMetadata(title="Test", url="https://example.com", viewport=vp, tabs=tabs) + return BrowserSnapshot(metadata=meta, html_content="", a11y_tree=None, dom_node=node, screenshot=b"") + + +def test_blocklist_blocks_action_type(): + bl = ActionBlocklist(disallow_types={"goto"}) + action = GotoAction(url="https://example.com") + assert bl.is_blocked(action, prev_snapshot=None) is True + + +def test_blocklist_allows_other_action_type(): + bl = ActionBlocklist(disallow_types={"goto"}) + action = ClickAction(id="B1") + assert bl.is_blocked(action, prev_snapshot=None) is False + + +def test_blocklist_blocks_keyword_in_interaction_text(): + bl = ActionBlocklist(keywords=["delete"]) + snap = make_snapshot_with_button("B1", text="Delete account") + action = ClickAction(id="B1") + assert bl.is_blocked(action, prev_snapshot=snap) is True + + +def test_blocklist_keyword_case_insensitive(): + bl = ActionBlocklist(keywords=["DeLeTe"]) # case-insensitive + snap = make_snapshot_with_button("B1", text="delete item") + action = ClickAction(id="B1") + assert bl.is_blocked(action, prev_snapshot=snap) is True + + +def test_blocklist_keyword_non_match(): + bl = ActionBlocklist(keywords=["delete"]) # case-insensitive + snap = make_snapshot_with_button("B1", text="Save changes") + action = ClickAction(id="B1") + assert bl.is_blocked(action, prev_snapshot=snap) is False diff --git a/tests/conftest.py b/tests/conftest.py index 0a81382aa..3b3efdca8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,6 +2,7 @@ from pathlib import Path import notte_core +import pytest CONFIG_PATH = Path(__file__).parent / "test_notte_config.toml" notte_core.set_error_mode("developer") @@ -49,3 +50,17 @@ def pytest_generate_tests(metafunc): # Apply parameterization only if any matching arguments exist if params: metafunc.parametrize(",".join(params.keys()), [next(iter(params.values()))]) + + +# Skip heavy integration tests on forked PRs or when not explicitly enabled +def pytest_collection_modifyitems(config, items): + run_integration = os.getenv("NOTTE_RUN_INTEGRATION", "").lower() in {"1", "true", "yes"} + if run_integration: + return + + skip_integration = pytest.mark.skip(reason="Integration tests are disabled unless NOTTE_RUN_INTEGRATION=1") + for item in items: + # Only skip tests under the integration folder + fspath = str(getattr(item, "fspath", "")) + if "/tests/integration/" in fspath or fspath.endswith("/tests/integration"): + item.add_marker(skip_integration)