Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions packages/notte-browser/src/notte_browser/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from notte_core.storage import BaseStorage
from notte_core.utils.code import text_contains_tabs
from notte_core.utils.platform import platform_control_key
from pydantic import BaseModel, Field
from typing_extensions import final

from notte_browser.captcha import CaptchaHandler
Expand All @@ -59,15 +60,70 @@
from notte_browser.window import BrowserWindow


class ActionBlocklist(BaseModel):
"""Blocklist policy for browser actions.

- disallow_types: exact action types to block (e.g., "goto", "click")
- keywords: element text keywords to block for interaction actions (case-insensitive)
"""

disallow_types: set[str] = Field(default_factory=set)
keywords: list[str] = Field(default_factory=list)

def is_type_blocked(self, action: BaseAction) -> bool:
return action.type in self.disallow_types

def is_keyword_blocked(self, action: BaseAction, prev_snapshot: BrowserSnapshot | None) -> bool:
if prev_snapshot is None:
return False
if not isinstance(action, InteractionAction):
return False
# id in InteractionAction is always a string; check emptiness only
if len(action.id) == 0:
return False
node = prev_snapshot.dom_node.find(action.id)
if node is None:
return False
# Aggregate potential text sources for matching
texts: list[str] = []
try:
texts.append(node.inner_text())
except Exception:
pass
try:
texts.append(node.text)
except Exception:
pass
try:
if node.attributes is not None:
for key in ("title", "aria_label", "name", "placeholder"):
val = getattr(node.attributes, key, None)
if isinstance(val, str):
texts.append(val)
except Exception:
pass
# `texts` is a list of strings; join directly
text = " ".join(texts).lower()
for kw in self.keywords:
if kw.lower() in text:
return True
return False

def is_blocked(self, action: BaseAction, prev_snapshot: BrowserSnapshot | None) -> bool:
return self.is_type_blocked(action) or self.is_keyword_blocked(action, prev_snapshot)


@final
class BrowserController:
def __init__(
self,
verbose: bool,
storage: BaseStorage | None = None,
blocklist: ActionBlocklist | None = None,
) -> None:
self.verbose: bool = verbose
self.storage: BaseStorage | None = storage
self.blocklist: ActionBlocklist | None = blocklist

async def switch_tab(self, window: BrowserWindow, tab_index: int) -> None:
context = window.page.context
Expand Down Expand Up @@ -373,6 +429,16 @@ async def execute(
context = window.page.context
num_pages = len(context.pages)
retval = True

# Enforce blocklist policy before execution
if self.blocklist is not None and self.blocklist.is_blocked(action, prev_snapshot):
# Raise an explicit error handled by session
raise ActionExecutionError(
action_id=getattr(action, "id", action.type),
url=window.page.url,
reason="Unauthorized action (blocked by policy)",
)

match action:
case InteractionAction():
retval = await self.execute_interaction_action(window, action, prev_snapshot)
Expand Down
7 changes: 5 additions & 2 deletions packages/notte-browser/src/notte_browser/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@

from notte_browser.action_selection.pipe import ActionSelectionPipe
from notte_browser.captcha import CaptchaHandler
from notte_browser.controller import BrowserController
from notte_browser.controller import ActionBlocklist, BrowserController
from notte_browser.dom.locate import locate_element
from notte_browser.errors import (
BrowserNotStartedError,
Expand Down Expand Up @@ -114,14 +114,17 @@ def __init__(
tools: list[BaseTool] | None = None,
window: BrowserWindow | None = None,
keep_alive: bool = False,
blocklist: ActionBlocklist | None = None,
**data: Unpack[SessionStartRequestDict],
) -> None:
self._request: SessionStartRequest = SessionStartRequest.model_validate(data)
if self._request.solve_captchas and not CaptchaHandler.is_available:
raise CaptchaSolverNotAvailableError()
self.screenshot_type: ScreenshotType = self._request.screenshot_type
self._window: BrowserWindow | None = window
self.controller: BrowserController = BrowserController(verbose=config.verbose, storage=storage)
self.controller: BrowserController = BrowserController(
verbose=config.verbose, storage=storage, blocklist=blocklist
)
self.storage: BaseStorage | None = storage
llmserve = LLMService.from_config(perception_type=perception_type)
self._action_space_pipe: MainActionSpacePipe = MainActionSpacePipe(llmserve=llmserve)
Expand Down
12 changes: 9 additions & 3 deletions packages/notte-core/src/notte_core/browser/highlighter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import io
from collections.abc import Iterable
from dataclasses import dataclass
from enum import Enum
from typing import ClassVar
from typing import Any, ClassVar, cast

from PIL import Image, ImageDraw, ImageFont
from pydantic import BaseModel
Expand Down Expand Up @@ -141,8 +142,13 @@ def is_area_uniform_color(image: Image.Image, rect: Rectangle, config: LabelConf
if region.mode != "RGB":
region = region.convert("RGB")

# Get pixel data
pixels: list[list[int]] = list(region.getdata()) # pyright: ignore [reportUnknownArgumentType, reportUnknownMemberType]
# Get pixel data.
# Note: Pillow's Image.getdata() returns an internal ImagingCore type whose
# typing is not mode-aware; stubs expose it with unknown member types.
# For RGB images, elements are 3-tuples[int, int, int]. We cast via Any
# and add a targeted ignore to satisfy the type checker without changing behavior.
data_any: Any = region.getdata() # pyright: ignore[reportUnknownMemberType]
pixels: list[tuple[int, int, int]] = list(cast(Iterable[tuple[int, int, int]], data_any))
if not pixels:
return True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
)
merged["id"] = "webvoyager--" + merged["id"]

merged.to_json("output.jsonl", orient="records", lines=True) # type: ignore
merged.to_json("output.jsonl", orient="records", lines=True) # pyright: ignore[reportUnknownMemberType]
59 changes: 59 additions & 0 deletions tests/browser/test_blocklist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from notte_browser.controller import ActionBlocklist
from notte_core.actions import ClickAction, GotoAction
from notte_core.browser.dom_tree import ComputedDomAttributes, DomAttributes, DomNode, NodeRole, NodeType
from notte_core.browser.snapshot import BrowserSnapshot, SnapshotMetadata, TabsData, ViewportData


def make_snapshot_with_button(button_id: str, text: str) -> BrowserSnapshot:
# Minimal DOM tree with one interactive node
attrs = DomAttributes.safe_init(tag_name="button", title=text, name=text)
computed = ComputedDomAttributes()
node = DomNode(
id=button_id,
type=NodeType.INTERACTION,
role=NodeRole.BUTTON,
text=text,
children=[],
attributes=attrs,
computed_attributes=computed,
)

vp = ViewportData(
scroll_x=0, scroll_y=0, viewport_width=800, viewport_height=600, total_width=800, total_height=600
)
tabs = [TabsData(tab_id=0, title="Test", url="https://example.com")]
meta = SnapshotMetadata(title="Test", url="https://example.com", viewport=vp, tabs=tabs)
return BrowserSnapshot(metadata=meta, html_content="<html></html>", a11y_tree=None, dom_node=node, screenshot=b"")


def test_blocklist_blocks_action_type():
bl = ActionBlocklist(disallow_types={"goto"})
action = GotoAction(url="https://example.com")
assert bl.is_blocked(action, prev_snapshot=None) is True


def test_blocklist_allows_other_action_type():
bl = ActionBlocklist(disallow_types={"goto"})
action = ClickAction(id="B1")
assert bl.is_blocked(action, prev_snapshot=None) is False


def test_blocklist_blocks_keyword_in_interaction_text():
bl = ActionBlocklist(keywords=["delete"])
snap = make_snapshot_with_button("B1", text="Delete account")
action = ClickAction(id="B1")
assert bl.is_blocked(action, prev_snapshot=snap) is True


def test_blocklist_keyword_case_insensitive():
bl = ActionBlocklist(keywords=["DeLeTe"]) # case-insensitive
snap = make_snapshot_with_button("B1", text="delete item")
action = ClickAction(id="B1")
assert bl.is_blocked(action, prev_snapshot=snap) is True


def test_blocklist_keyword_non_match():
bl = ActionBlocklist(keywords=["delete"]) # case-insensitive
snap = make_snapshot_with_button("B1", text="Save changes")
action = ClickAction(id="B1")
assert bl.is_blocked(action, prev_snapshot=snap) is False
15 changes: 15 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path

import notte_core
import pytest

CONFIG_PATH = Path(__file__).parent / "test_notte_config.toml"
notte_core.set_error_mode("developer")
Expand Down Expand Up @@ -49,3 +50,17 @@ def pytest_generate_tests(metafunc):
# Apply parameterization only if any matching arguments exist
if params:
metafunc.parametrize(",".join(params.keys()), [next(iter(params.values()))])


# Skip heavy integration tests on forked PRs or when not explicitly enabled
def pytest_collection_modifyitems(config, items):
run_integration = os.getenv("NOTTE_RUN_INTEGRATION", "").lower() in {"1", "true", "yes"}
if run_integration:
return

skip_integration = pytest.mark.skip(reason="Integration tests are disabled unless NOTTE_RUN_INTEGRATION=1")
for item in items:
# Only skip tests under the integration folder
fspath = str(getattr(item, "fspath", ""))
if "/tests/integration/" in fspath or fspath.endswith("/tests/integration"):
item.add_marker(skip_integration)