diff --git a/DockerfileLocal b/DockerfileLocal
index f934d97498..c7a92be406 100644
--- a/DockerfileLocal
+++ b/DockerfileLocal
@@ -33,4 +33,4 @@ EXPOSE 22 80 9000-9009
 RUN chmod +x /exe/initialize.sh /exe/run_A0.sh /exe/run_searxng.sh /exe/run_tunnel_api.sh
 
 # initialize runtime and switch to supervisord
-CMD ["/exe/initialize.sh", "$BRANCH"]
+CMD ["/exe/initialize.sh", "$BRANCH"]
\ No newline at end of file
diff --git a/agent.py b/agent.py
index 594dc37bc5..4f1ea4863d 100644
--- a/agent.py
+++ b/agent.py
@@ -275,7 +275,6 @@ class AgentConfig:
     chat_model: models.ModelConfig
     utility_model: models.ModelConfig
     embeddings_model: models.ModelConfig
-    browser_model: models.ModelConfig
     mcp_servers: str
     profile: str = ""
     memory_subdir: str = ""
@@ -287,7 +286,12 @@ class AgentConfig:
     code_exec_ssh_user: str = "root"
     code_exec_ssh_pass: str = ""
     additional: Dict[str, Any] = field(default_factory=dict)
-
+    browser_control_headless: bool = False  # Browser GUI enabled for interaction (uses VNC if available, otherwise X11 forwarding)
+    browser_control_cdp_url: str = ""  # Chrome DevTools Protocol URL for native browser (e.g., "ws://host.docker.internal:9222/devtools/browser/..."), leave empty to use embedded browser with VNC
+    browser_control_start_url: str = "https://www.google.com"
+    browser_control_timeout: int = 5000 # milliseconds
+    # VNC is automatically enabled if available (configured in docker-compose.yml)
+    # Access browser control via noVNC when agent calls pause_for_user method
 
 @dataclass
 class UserMessage:
@@ -676,14 +680,6 @@ def get_utility_model(self):
             **self.config.utility_model.build_kwargs(),
         )
 
-    def get_browser_model(self):
-        return models.get_browser_model(
-            self.config.browser_model.provider,
-            self.config.browser_model.name,
-            model_config=self.config.browser_model,
-            **self.config.browser_model.build_kwargs(),
-        )
-
     def get_embedding_model(self):
         return models.get_embedding_model(
             self.config.embeddings_model.provider,
diff --git a/docker/base/Dockerfile b/docker/base/Dockerfile
index 7e94ed80a5..79dfaab174 100644
--- a/docker/base/Dockerfile
+++ b/docker/base/Dockerfile
@@ -27,6 +27,12 @@ RUN bash /ins/install_base_packages4.sh
 # install python after packages to ensure version overriding
 RUN bash /ins/install_python.sh
 
+# install X11 support for browser display
+RUN bash /ins/install_x11_support.sh
+
+# install VNC server and noVNC for remote browser control
+RUN bash /ins/install_vnc.sh
+
 # install searxng
 RUN bash /ins/install_searxng.sh
 
diff --git a/docker/run/docker-compose.yml b/docker/run/docker-compose.yml
index cc48f3f1ba..a90a70f71d 100644
--- a/docker/run/docker-compose.yml
+++ b/docker/run/docker-compose.yml
@@ -1,8 +1,53 @@
 services:
   agent-zero:
     container_name: agent-zero
-    image: agent0ai/agent-zero:latest
+    # Use local development image (build with: docker build -f DockerfileLocal -t agent-zero-local --build-arg CACHE_DATE=$(date +%Y-%m-%d:%H:%M:%S) .)
+    image: agent-zero-local
+    # Use Docker Hub image for production deployments
+    # image: agent0ai/agent-zero:latest
     volumes:
-      - ./agent-zero:/a0
+      # Mount the actual project root (not the outdated copy in ./agent-zero)
+      # This allows live development - changes reflected immediately without rebuild
+      - ../..:/a0
+      # X11 socket for GUI display on macOS (auto-configured)
+      - /tmp/.X11-unix:/tmp/.X11-unix:rw
     ports:
-      - "50080:80"
\ No newline at end of file
+      - "55022:22"
+      - "50080:80"
+      - "56080:6080"  # noVNC web client for browser control
+      - "50090:9000"
+      - "50091:9001"
+      - "50092:9002"
+      - "50093:9003"
+      - "50094:9004"
+      - "50095:9005"
+      - "50096:9006"
+      - "50097:9007"
+      - "50098:9008"
+      - "50099:9009"
+    environment:
+      # X11 display forwarding via TCP (Docker Desktop on macOS uses VM)
+      - DISPLAY=host.docker.internal:0
+      - XAUTHORITY=/tmp/.Xauthority
+      # VNC configuration for remote browser control
+      - VNC_DISPLAY=:99
+      - VNC_RESOLUTION=1920x1080x24
+      - VNC_PORT=5900
+      - NOVNC_PORT=6080
+      - NOVNC_EXTERNAL_PORT=56080  # External port mapping for noVNC access
+      - VNC_PASSWORD=agent-zero
+    # Allow container to reach host for X11
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    # Security options for X11
+    security_opt:
+      - seccomp:unconfined
+    # Shared memory for browser (required for Chromium)
+    shm_size: '2gb'
+    # Auto-check and setup display and VNC on startup
+    command: >
+      bash -c "
+        /exe/check_display.sh || true &&
+        /exe/start_vnc.sh || true &&
+        /exe/initialize.sh development
+      "
\ No newline at end of file
diff --git a/docker/run/fs/etc/supervisor/conf.d/vnc.conf b/docker/run/fs/etc/supervisor/conf.d/vnc.conf
new file mode 100644
index 0000000000..e20409b2da
--- /dev/null
+++ b/docker/run/fs/etc/supervisor/conf.d/vnc.conf
@@ -0,0 +1,13 @@
+[program:run_vnc]
+command=/exe/start_vnc.sh
+environment=
+user=root
+stopwaitsecs=10
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+autorestart=true
+startretries=3
+stopasgroup=true
+killasgroup=true
diff --git a/docker/run/fs/exe/check_display.sh b/docker/run/fs/exe/check_display.sh
new file mode 100755
index 0000000000..d0b2e7813a
--- /dev/null
+++ b/docker/run/fs/exe/check_display.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+# Automatic X11 display setup checker
+# Runs on container startup to verify display forwarding
+# No user interaction required - fully automatic
+
+set -e
+
+echo "========================================"
+echo "Agent Zero - Display Setup Check"
+echo "========================================"
+
+# Detect if running on macOS host
+IS_MACOS=false
+if [ -f /tmp/.X11-unix ] || [ "$DISPLAY" = "host.docker.internal:0" ]; then
+    IS_MACOS=true
+fi
+
+# Check if DISPLAY is set
+if [ -z "$DISPLAY" ]; then
+    echo "⚠️  No display configured (headless mode)"
+    echo "   Browser will run in headless mode (invisible)"
+    echo ""
+    echo "To enable visible browser on macOS:"
+    echo "  1. Install XQuartz: https://www.xquartz.org/"
+    echo "  2. Start XQuartz and restart Agent Zero"
+    exit 0
+fi
+
+# Display is configured - verify X11 libraries
+echo "✓ Display configured: $DISPLAY"
+
+# Check if X11 libraries are installed
+if ! dpkg -l | grep -q libx11-6; then
+    echo "Installing X11 libraries for browser display..."
+    apt-get update -qq
+    DEBIAN_FRONTEND=noninteractive apt-get install -y -qq \
+        libx11-6 libxcb1 libxcomposite1 libxcursor1 libxdamage1 \
+        libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 \
+        libxtst6 libgbm1 libasound2 libatk1.0-0 libatk-bridge2.0-0 \
+        libcups2 libdrm2 libgtk-3-0 libnspr4 libnss3 \
+        2>&1 | grep -v "^Reading" | grep -v "^Building" || true
+fi
+
+echo "✓ X11 libraries installed"
+
+# Test X11 connection
+if [ "$IS_MACOS" = true ]; then
+    echo "Testing X11 connection to macOS host..."
+
+    # Try to connect to X11
+    timeout 2 xdpyinfo -display "$DISPLAY" > /dev/null 2>&1 && {
+        echo "✓ X11 connection successful"
+        echo "✓ Browser will appear on your screen"
+        exit 0
+    } || {
+        echo ""
+        echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+        echo "⚠️  Cannot connect to X11 display"
+        echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+        echo ""
+        echo "To see the browser window, you need XQuartz:"
+        echo ""
+        echo "  1. Download and install XQuartz:"
+        echo "     https://www.xquartz.org/"
+        echo ""
+        echo "  2. Log out and log back in (required!)"
+        echo ""
+        echo "  3. Allow Docker connections:"
+        echo "     xhost +localhost"
+        echo ""
+        echo "  4. Restart Agent Zero:"
+        echo "     cd docker/run && docker-compose restart"
+        echo ""
+        echo "For now, browser will run in headless mode."
+        echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+        echo ""
+        exit 0
+    }
+fi
+
+echo "✓ Display setup complete"
+echo "========================================"
diff --git a/docker/run/fs/exe/initialize.sh b/docker/run/fs/exe/initialize.sh
index 8c329bb304..ba6b5d9cc3 100644
--- a/docker/run/fs/exe/initialize.sh
+++ b/docker/run/fs/exe/initialize.sh
@@ -19,5 +19,8 @@ chmod 444 /root/.profile
 # update package list to save time later
 apt-get update > /dev/null 2>&1 &
 
+# Start VNC server in the background (for browser control feature)
+/exe/start_vnc.sh > /tmp/vnc_startup.log 2>&1 &
+
 # let supervisord handle the services
 exec /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
diff --git a/docker/run/fs/exe/start_vnc.sh b/docker/run/fs/exe/start_vnc.sh
new file mode 100755
index 0000000000..906e5f43dc
--- /dev/null
+++ b/docker/run/fs/exe/start_vnc.sh
@@ -0,0 +1,203 @@
+#!/bin/bash
+# VNC Server Startup Script
+# Starts Xvfb, x11vnc, and noVNC for remote browser control
+# Can be safely run multiple times (idempotent)
+
+set -e
+
+echo "========================================"
+echo "Agent Zero - VNC Server Setup"
+echo "========================================"
+
+# Configuration from environment variables with defaults
+VNC_DISPLAY="${VNC_DISPLAY:-:99}"
+VNC_RESOLUTION="${VNC_RESOLUTION:-1920x1080x24}"
+VNC_PORT="${VNC_PORT:-5900}"
+NOVNC_PORT="${NOVNC_PORT:-6080}"
+VNC_PASSWORD="${VNC_PASSWORD:-agent-zero}"
+
+# Extract display number (e.g., :99 -> 99)
+DISPLAY_NUM=$(echo $VNC_DISPLAY | tr -d ':')
+
+echo "Configuration:"
+echo "  Display: $VNC_DISPLAY"
+echo "  Resolution: $VNC_RESOLUTION"
+echo "  VNC Port: $VNC_PORT"
+echo "  noVNC Port: $NOVNC_PORT"
+echo "========================================"
+
+# Function to check if a process is running
+is_running() {
+    pgrep -f "$1" > /dev/null 2>&1
+}
+
+# Function to kill existing VNC processes
+cleanup_vnc() {
+    echo "Cleaning up existing VNC processes..."
+    pkill -f "Xvfb $VNC_DISPLAY" || true
+    pkill -f "x11vnc.*$VNC_DISPLAY" || true
+    pkill -f "websockify.*$NOVNC_PORT" || true
+    # Remove stale lock file (socket file removal may fail, but that's OK)
+    rm -f /tmp/.X${DISPLAY_NUM}-lock 2>/dev/null || true
+    rm -f /tmp/.X11-unix/X${DISPLAY_NUM} 2>/dev/null || true
+    sleep 1
+}
+
+# Check if already running - if so, skip to monitoring
+if is_running "Xvfb $VNC_DISPLAY" && is_running "x11vnc.*$VNC_DISPLAY" && is_running "websockify.*$NOVNC_PORT"; then
+    echo "✓ VNC server already running"
+    echo "  - Xvfb on display $VNC_DISPLAY"
+    echo "  - x11vnc on port $VNC_PORT"
+    echo "  - noVNC web client on port $NOVNC_PORT"
+    echo "========================================"
+
+    # Skip to monitoring instead of exiting
+    # Find PIDs of running processes
+    XVFB_PID=$(pgrep -f "Xvfb $VNC_DISPLAY" | head -1)
+    X11VNC_PID=$(pgrep -f "x11vnc.*$VNC_DISPLAY" | head -1)
+    WEBSOCKIFY_PID=$(pgrep -f "websockify.*$NOVNC_PORT" | head -1)
+
+    # Create status file
+    mkdir -p /tmp/vnc
+    echo "DISPLAY=$VNC_DISPLAY" > /tmp/vnc/status
+    echo "VNC_PORT=$VNC_PORT" >> /tmp/vnc/status
+    echo "NOVNC_PORT=$NOVNC_PORT" >> /tmp/vnc/status
+    echo "XVFB_PID=$XVFB_PID" >> /tmp/vnc/status
+    echo "X11VNC_PID=$X11VNC_PID" >> /tmp/vnc/status
+    echo "WEBSOCKIFY_PID=$WEBSOCKIFY_PID" >> /tmp/vnc/status
+    echo "READY=true" >> /tmp/vnc/status
+
+    # Jump to monitoring loop
+    # Use a label/goto simulation by setting a flag
+    SKIP_STARTUP=true
+else
+    SKIP_STARTUP=false
+fi
+
+# Only run startup if not skipping
+if [ "$SKIP_STARTUP" = "false" ]; then
+
+# Clean up any partial VNC processes
+cleanup_vnc
+
+# Create VNC password file
+mkdir -p /root/.vnc
+echo "Setting VNC password..."
+x11vnc -storepasswd "$VNC_PASSWORD" /root/.vnc/passwd 2>/dev/null || {
+    echo "⚠️  Failed to set VNC password, trying alternative method..."
+    # Alternative method using printf and stdin
+    printf "%s\n%s\n" "$VNC_PASSWORD" "$VNC_PASSWORD" | x11vnc -storepasswd /root/.vnc/passwd 2>/dev/null || {
+        echo "⚠️  Password setup failed, VNC may not be accessible"
+    }
+}
+
+# Start Xvfb (X virtual framebuffer)
+echo "Starting Xvfb on display $VNC_DISPLAY..."
+Xvfb $VNC_DISPLAY -screen 0 $VNC_RESOLUTION -ac +extension GLX +render -noreset > /tmp/xvfb.log 2>&1 &
+XVFB_PID=$!
+
+# Wait for Xvfb to be ready
+sleep 2
+
+if ! is_running "Xvfb $VNC_DISPLAY"; then
+    echo "❌ Failed to start Xvfb"
+    cat /tmp/xvfb.log
+    exit 1
+fi
+
+echo "✓ Xvfb started successfully (PID: $XVFB_PID)"
+
+# Start x11vnc (VNC server)
+echo "Starting x11vnc on port $VNC_PORT..."
+x11vnc \
+    -display $VNC_DISPLAY \
+    -rfbport $VNC_PORT \
+    -rfbauth /root/.vnc/passwd \
+    -forever \
+    -shared \
+    -noxdamage \
+    -ncache 10 \
+    -ncache_cr \
+    -localhost \
+    -quiet \
+    > /tmp/x11vnc.log 2>&1 &
+X11VNC_PID=$!
+
+# Wait for x11vnc to be ready
+sleep 2
+
+if ! is_running "x11vnc.*$VNC_DISPLAY"; then
+    echo "❌ Failed to start x11vnc"
+    cat /tmp/x11vnc.log
+    exit 1
+fi
+
+echo "✓ x11vnc started successfully (PID: $X11VNC_PID)"
+
+# Find noVNC installation
+NOVNC_PATH=""
+if [ -d "/opt/novnc" ]; then
+    NOVNC_PATH="/opt/novnc"
+elif [ -d "/usr/share/novnc" ]; then
+    NOVNC_PATH="/usr/share/novnc"
+elif [ -d "/usr/share/noVNC" ]; then
+    NOVNC_PATH="/usr/share/noVNC"
+fi
+
+if [ -z "$NOVNC_PATH" ]; then
+    echo "⚠️  noVNC not found, VNC server running but no web access"
+    echo "   You can still connect with a VNC client on port $VNC_PORT"
+    echo "========================================"
+    exit 0
+fi
+
+# Start websockify for noVNC
+echo "Starting noVNC web client on port $NOVNC_PORT..."
+websockify \
+    --web=$NOVNC_PATH \
+    $NOVNC_PORT \
+    localhost:$VNC_PORT \
+    > /tmp/websockify.log 2>&1 &
+WEBSOCKIFY_PID=$!
+
+# Wait for websockify to be ready
+sleep 2
+
+if ! is_running "websockify.*$NOVNC_PORT"; then
+    echo "⚠️  Failed to start websockify/noVNC"
+    cat /tmp/websockify.log
+    echo "   VNC server is running, but web access unavailable"
+else
+    echo "✓ noVNC started successfully (PID: $WEBSOCKIFY_PID)"
+    echo ""
+    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+    echo "🎉 VNC Server Ready!"
+    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+    echo ""
+    echo "  Web Access: http://localhost:$NOVNC_PORT/vnc.html"
+    echo "  VNC Client: localhost:$DISPLAY_NUM (port $VNC_PORT)"
+    echo "  Password: $VNC_PASSWORD"
+    echo ""
+    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+fi
+
+echo "========================================"
+
+# Create status file for other scripts to check (only if we started VNC)
+if [ "$SKIP_STARTUP" = "false" ]; then
+    mkdir -p /tmp/vnc
+    echo "DISPLAY=$VNC_DISPLAY" > /tmp/vnc/status
+    echo "VNC_PORT=$VNC_PORT" >> /tmp/vnc/status
+    echo "NOVNC_PORT=$NOVNC_PORT" >> /tmp/vnc/status
+    echo "XVFB_PID=$XVFB_PID" >> /tmp/vnc/status
+    echo "X11VNC_PID=$X11VNC_PID" >> /tmp/vnc/status
+    echo "WEBSOCKIFY_PID=$WEBSOCKIFY_PID" >> /tmp/vnc/status
+    echo "READY=true" >> /tmp/vnc/status
+fi
+
+# Close the startup section
+fi
+
+# VNC is now running in the background
+# Exit the script so initialize.sh can continue
+exit 0
diff --git a/docker/run/fs/ins/install_vnc.sh b/docker/run/fs/ins/install_vnc.sh
new file mode 100755
index 0000000000..fdf854f41a
--- /dev/null
+++ b/docker/run/fs/ins/install_vnc.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# Install VNC server and noVNC for remote browser control
+# This allows users to manually interact with the browser when the agent pauses
+
+set -e
+
+echo "Installing VNC server and noVNC..."
+
+# Update package list
+apt-get update
+
+# Install Xvfb (X virtual framebuffer) for headless display
+# Install x11vnc for VNC server
+# Install websockify for WebSocket support (required by noVNC)
+# Install novnc for web-based VNC client
+DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    xvfb \
+    x11vnc \
+    websockify \
+    novnc \
+    net-tools \
+    procps
+
+# Create VNC directory for password and configuration
+mkdir -p /root/.vnc
+
+# Set default VNC password (will be overridden by environment variable)
+# Using x11vnc password format - pass password as argument
+x11vnc -storepasswd "agent-zero" /root/.vnc/passwd 2>/dev/null || true
+
+# Create symlink for noVNC to easily find it
+# noVNC is typically installed in /usr/share/novnc
+if [ -d "/usr/share/novnc" ]; then
+    ln -sf /usr/share/novnc /opt/novnc
+elif [ -d "/usr/share/noVNC" ]; then
+    ln -sf /usr/share/noVNC /opt/novnc
+fi
+
+# Clean up
+apt-get clean
+rm -rf /var/lib/apt/lists/*
+
+echo "✓ VNC server and noVNC installed"
+echo "  - Xvfb for virtual display"
+echo "  - x11vnc for VNC server"
+echo "  - noVNC for web-based access"
+echo "  - Default VNC password: agent-zero (change via VNC_PASSWORD env var)"
diff --git a/docker/run/fs/ins/install_x11_support.sh b/docker/run/fs/ins/install_x11_support.sh
new file mode 100644
index 0000000000..6bbc4b5445
--- /dev/null
+++ b/docker/run/fs/ins/install_x11_support.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Install X11 and GUI support for browser display
+# This allows Chromium to display on the host machine via X11 forwarding
+
+set -e
+
+echo "Installing X11 and GUI support for browser display..."
+
+# Update package list
+apt-get update
+
+# Install X11 libraries and dependencies for GUI applications
+DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    libx11-6 \
+    libx11-xcb1 \
+    libxcb1 \
+    libxcomposite1 \
+    libxcursor1 \
+    libxdamage1 \
+    libxext6 \
+    libxfixes3 \
+    libxi6 \
+    libxrandr2 \
+    libxrender1 \
+    libxss1 \
+    libxtst6 \
+    libxcb-dri3-0 \
+    libxcb-shm0 \
+    libxshmfence1 \
+    libgbm1 \
+    libatk1.0-0 \
+    libatk-bridge2.0-0 \
+    libcups2 \
+    libdrm2 \
+    libgtk-3-0 \
+    libnspr4 \
+    libnss3 \
+    libpango-1.0-0 \
+    libpangocairo-1.0-0 \
+    libglib2.0-0 \
+    libdbus-1-3 \
+    fonts-liberation \
+    xdg-utils
+
+# Install additional fonts for better browser rendering
+DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    fonts-noto \
+    fonts-noto-cjk \
+    fonts-noto-color-emoji
+
+# Clean up
+apt-get clean
+rm -rf /var/lib/apt/lists/*
+
+echo "✓ X11 and GUI support installed"
diff --git a/initialize.py b/initialize.py
index 3c42c952e5..8f77e40c13 100644
--- a/initialize.py
+++ b/initialize.py
@@ -60,21 +60,11 @@ def _normalize_model_kwargs(kwargs: dict) -> dict:
         limit_requests=current_settings["embed_model_rl_requests"],
         kwargs=_normalize_model_kwargs(current_settings["embed_model_kwargs"]),
     )
-    # browser model from user settings
-    browser_llm = models.ModelConfig(
-        type=models.ModelType.CHAT,
-        provider=current_settings["browser_model_provider"],
-        name=current_settings["browser_model_name"],
-        api_base=current_settings["browser_model_api_base"],
-        vision=current_settings["browser_model_vision"],
-        kwargs=_normalize_model_kwargs(current_settings["browser_model_kwargs"]),
-    )
     # agent configuration
     config = AgentConfig(
         chat_model=chat_llm,
         utility_model=utility_llm,
         embeddings_model=embedding_llm,
-        browser_model=browser_llm,
         profile=current_settings["agent_profile"],
         memory_subdir=current_settings["agent_memory_subdir"],
         knowledge_subdirs=[current_settings["agent_knowledge_subdir"], "default"],
diff --git a/models.py b/models.py
index 469925e49f..d155f275fa 100644
--- a/models.py
+++ b/models.py
@@ -25,7 +25,7 @@
 from python.helpers.providers import get_provider_config
 from python.helpers.rate_limiter import RateLimiter
 from python.helpers.tokens import approximate_tokens
-from python.helpers import dirty_json, browser_use_monkeypatch
+from python.helpers import dirty_json
 
 from langchain_core.language_models.chat_models import SimpleChatModel
 from langchain_core.outputs.chat_generation import ChatGenerationChunk
@@ -43,7 +43,7 @@
 from sentence_transformers import SentenceTransformer
 
 
-# disable extra logging, must be done repeatedly, otherwise browser-use will turn it back on for some reason
+# disable extra logging
 def turn_off_logging():
     os.environ["LITELLM_LOG"] = "ERROR"  # only errors
     litellm.suppress_debug_info = True
@@ -56,9 +56,8 @@ def turn_off_logging():
 # init
 load_dotenv()
 turn_off_logging()
-browser_use_monkeypatch.apply()
 
-litellm.modify_params = True # helps fix anthropic tool calls by browser-use
+litellm.modify_params = True # helps fix anthropic tool calls
 
 class ModelType(Enum):
     CHAT = "Chat"
@@ -578,83 +577,6 @@ def __init__(self, wrapper, *args, **kwargs):
         self.chat = AsyncAIChatReplacement._Chat(wrapper)
 
 
-from browser_use.llm import ChatOllama, ChatOpenRouter, ChatGoogle, ChatAnthropic, ChatGroq, ChatOpenAI
-
-class BrowserCompatibleChatWrapper(ChatOpenRouter):
-    """
-    A wrapper for browser agent that can filter/sanitize messages
-    before sending them to the LLM.
-    """
-
-    def __init__(self, *args, **kwargs):
-        turn_off_logging()
-        # Create the underlying LiteLLM wrapper
-        self._wrapper = LiteLLMChatWrapper(*args, **kwargs)
-        # Browser-use may expect a 'model' attribute
-        self.model = self._wrapper.model_name
-        self.kwargs = self._wrapper.kwargs
-
-    @property
-    def model_name(self) -> str:
-        return self._wrapper.model_name
-
-    @property
-    def provider(self) -> str:
-        return self._wrapper.provider
-
-    def get_client(self, *args, **kwargs):  # type: ignore
-        return AsyncAIChatReplacement(self, *args, **kwargs)
-
-    async def _acall(
-        self,
-        messages: List[BaseMessage],
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ):
-        # Apply rate limiting if configured
-        apply_rate_limiter_sync(self._wrapper.a0_model_conf, str(messages))
-
-        # Call the model
-        try:
-            model = kwargs.pop("model", None)
-            kwrgs = {**self._wrapper.kwargs, **kwargs}
-
-            # hack from browser-use to fix json schema for gemini (additionalProperties, $defs, $ref)
-            if "response_format" in kwrgs and "json_schema" in kwrgs["response_format"] and model.startswith("gemini/"):
-                kwrgs["response_format"]["json_schema"] = ChatGoogle("")._fix_gemini_schema(kwrgs["response_format"]["json_schema"])
-
-            resp = await acompletion(
-                model=self._wrapper.model_name,
-                messages=messages,
-                stop=stop,
-                **kwrgs,
-            )
-
-            # Gemini: strip triple backticks and conform schema
-            try:
-                msg = resp.choices[0].message # type: ignore
-                if self.provider == "gemini" and isinstance(getattr(msg, "content", None), str):
-                    cleaned = browser_use_monkeypatch.gemini_clean_and_conform(msg.content) # type: ignore
-                    if cleaned:
-                        msg.content = cleaned
-            except Exception:
-                pass
-
-        except Exception as e:
-            raise e
-
-        # another hack for browser-use post process invalid jsons
-        try:
-            if "response_format" in kwrgs and "json_schema" in kwrgs["response_format"] or "json_object" in kwrgs["response_format"]:
-                if resp.choices[0].message.content is not None and not resp.choices[0].message.content.startswith("{"): # type: ignore
-                    js = dirty_json.parse(resp.choices[0].message.content) # type: ignore
-                    resp.choices[0].message.content = dirty_json.stringify(js) # type: ignore
-        except Exception as e:
-            pass
-
-        return resp
-
 class LiteLLMEmbeddingWrapper(Embeddings):
     model_name: str
     kwargs: dict = {}
@@ -899,16 +821,6 @@ def get_chat_model(
     )
 
 
-def get_browser_model(
-    provider: str, name: str, model_config: Optional[ModelConfig] = None, **kwargs: Any
-) -> BrowserCompatibleChatWrapper:
-    orig = provider.lower()
-    provider_name, kwargs = _merge_provider_defaults("chat", orig, kwargs)
-    return _get_litellm_chat(
-        BrowserCompatibleChatWrapper, name, provider_name, model_config, **kwargs
-    )
-
-
 def get_embedding_model(
     provider: str, name: str, model_config: Optional[ModelConfig] = None, **kwargs: Any
 ) -> LiteLLMEmbeddingWrapper | LocalSentenceTransformerWrapper:
diff --git a/prompts/agent.system.tool.browser.md b/prompts/agent.system.tool.browser.md
index 120316e155..f0a00c46ea 100644
--- a/prompts/agent.system.tool.browser.md
+++ b/prompts/agent.system.tool.browser.md
@@ -1,36 +1,186 @@
-### browser_agent:
-
-subordinate agent controls playwright browser
-message argument talks to agent give clear instructions credentials task based
-reset argument spawns new agent
-do not reset if iterating
-be precise descriptive like: open google login and end task, log in using ... and end task
-when following up start: considering open pages
-dont use phrase wait for instructions use end task
-downloads default in /a0/tmp/downloads
-pass secrets and variables in message when needed
-
-usage:
-```json
-{
-  "thoughts": ["I need to log in to..."],
-  "headline": "Opening new browser session for login",
-  "tool_name": "browser_agent",
-  "tool_args": {
-    "message": "Open and log me into...",
-    "reset": "true"
-  }
-}
-```
-
-```json
-{
-  "thoughts": ["I need to log in to..."],
-  "headline": "Continuing with existing browser session",
-  "tool_name": "browser_agent",
-  "tool_args": {
-    "message": "Considering open pages, click...",
-    "reset": "false"
-  }
-}
-```
+### browser_control
+
+granular browser control with individual actions
+use for precise web automation tasks when browser_agent is too high-level
+available methods: navigate, click, type, scroll, observe_page, select, press, hover, pause_for_user, get_browser_info
+screenshots captured automatically after each action for visual feedback
+
+**navigate** - go to URL
+**click** - click element by CSS selector or text
+**type** - type text into input field
+**scroll** - scroll page (direction: up/down/left/right)
+**observe_page** - get current page state, title, content, elements (adds screenshot to context)
+**select** - select option from dropdown
+**press** - press keyboard key on element
+**hover** - hover over element
+**pause_for_user** - pause execution for manual user interaction (CAPTCHAs, manual login, etc.)
+  - requires browser to be in visible mode (headless=False)
+  - waits specified seconds for user to interact with browser
+  - use when encountering CAPTCHAs, blocked automation, or manual verification needed
+**get_browser_info** - diagnostic tool to check browser visibility mode and troubleshoot
+  - shows current headless/visible mode
+  - displays configuration settings
+  - provides troubleshooting tips if browser not visible
+  - use when you can't see the browser window or need to verify settings
+
+session management:
+- browser state persists across calls
+- use reset arg to start fresh session
+- same page context maintained between actions
+- screenshots available in chat history
+
+usage examples:
+
+1. Navigate and observe
+~~~json
+{
+    "thoughts": ["Need to open the website and see what's there"],
+    "headline": "Opening website",
+    "tool_name": "browser_control:navigate",
+    "tool_args": {
+        "url": "https://example.com"
+    }
+}
+~~~
+
+2. Observe current page
+~~~json
+{
+    "thoughts": ["Let me see what's on this page"],
+    "headline": "Observing page content",
+    "tool_name": "browser_control:observe_page",
+    "tool_args": {}
+}
+~~~
+
+3. Click element
+~~~json
+{
+    "thoughts": ["Need to click the login button"],
+    "headline": "Clicking login button",
+    "tool_name": "browser_control:click",
+    "tool_args": {
+        "selector": "button[type='submit']"
+    }
+}
+~~~
+
+4. Type into field
+~~~json
+{
+    "thoughts": ["Entering username"],
+    "headline": "Typing username",
+    "tool_name": "browser_control:type",
+    "tool_args": {
+        "selector": "input[name='username']",
+        "text": "myusername"
+    }
+}
+~~~
+
+5. Scroll page
+~~~json
+{
+    "thoughts": ["Need to see more content"],
+    "headline": "Scrolling down",
+    "tool_name": "browser_control:scroll",
+    "tool_args": {
+        "direction": "down"
+    }
+}
+~~~
+
+6. Select dropdown option
+~~~json
+{
+    "thoughts": ["Need to select country from dropdown"],
+    "headline": "Selecting country",
+    "tool_name": "browser_control:select",
+    "tool_args": {
+        "selector": "select[name='country']",
+        "value": "USA"
+    }
+}
+~~~
+
+7. Press key
+~~~json
+{
+    "thoughts": ["Need to submit form with Enter key"],
+    "headline": "Pressing Enter",
+    "tool_name": "browser_control:press",
+    "tool_args": {
+        "selector": "input[name='search']",
+        "key": "Enter"
+    }
+}
+~~~
+
+8. Hover over element
+~~~json
+{
+    "thoughts": ["Need to hover over menu to reveal submenu"],
+    "headline": "Hovering over menu",
+    "tool_name": "browser_control:hover",
+    "tool_args": {
+        "selector": "#main-menu"
+    }
+}
+~~~
+
+9. Pause for user interaction
+~~~json
+{
+    "thoughts": ["Encountered a CAPTCHA that needs manual solving"],
+    "headline": "Pausing for CAPTCHA",
+    "tool_name": "browser_control:pause_for_user",
+    "tool_args": {
+        "wait_seconds": 120,
+        "message": "Please solve the CAPTCHA"
+    }
+}
+~~~
+
+10. Check browser visibility and settings
+~~~json
+{
+    "thoughts": ["User says they can't see the browser window, let me check the configuration"],
+    "headline": "Checking browser settings",
+    "tool_name": "browser_control:get_browser_info",
+    "tool_args": {}
+}
+~~~
+
+11. Reset session
+~~~json
+{
+    "thoughts": ["Browser session seems stuck, starting fresh"],
+    "headline": "Resetting browser session",
+    "tool_name": "browser_control:navigate",
+    "tool_args": {
+        "url": "https://example.com",
+        "reset": "true"
+    }
+}
+~~~
+
+**configuration:**
+- to enable visible browser for manual interaction: set `browser_control_headless: False` in agent config
+- default is headless mode (browser runs invisibly in background)
+- visible mode required for pause_for_user to work
+- start URL can be configured with `browser_control_start_url`
+- timeout can be configured with `browser_control_timeout` (milliseconds)
+
+**best practices:**
+- always observe_page first to understand current state
+- use specific CSS selectors when possible (id, class, name attribute)
+- for text-based clicking, selector will be treated as text content
+- handle failures gracefully - try alternative selectors if needed
+- reset session if browser gets stuck or navigation fails repeatedly
+- each action is atomic - chain multiple actions for complex workflows
+- screenshots show visual state after each action
+- observe_page adds screenshot to your context for vision analysis
+- use pause_for_user when encountering CAPTCHAs or automation blocks
+- use get_browser_info when user reports browser visibility issues
+- if browser was initialized in wrong mode, use reset=true to restart it
+
diff --git a/prompts/browser_agent.system.md b/prompts/browser_control.system.md
similarity index 100%
rename from prompts/browser_agent.system.md
rename to prompts/browser_control.system.md
diff --git a/python/api/browser_control.py b/python/api/browser_control.py
new file mode 100644
index 0000000000..1899ff66b4
--- /dev/null
+++ b/python/api/browser_control.py
@@ -0,0 +1,81 @@
+from python.helpers.api import ApiHandler, Request, Response
+from flask import send_file, redirect
+import os
+
+class BrowserControl(ApiHandler):
+    """
+    API endpoint for accessing the browser control interface (noVNC).
+    This allows users to manually interact with the browser when the agent pauses.
+    """
+
+    @classmethod
+    def requires_auth(cls) -> bool:
+        # Require authentication for browser control access
+        return True
+
+    @classmethod
+    def requires_csrf(cls) -> bool:
+        # CSRF not needed for GET requests
+        return False
+
+    @classmethod
+    def get_methods(cls) -> list[str]:
+        return ["GET"]
+
+    async def process(self, input: dict, request: Request) -> dict | Response:
+        """
+        Returns information about the VNC server and provides access to noVNC client.
+
+        Query parameters:
+        - action: 'info' (default) | 'redirect'
+        - info: Returns VNC connection details
+        - redirect: Redirects to the noVNC web client
+        """
+        action = request.args.get('action', 'info')
+
+        # Check if VNC is running by reading status file
+        vnc_status_file = '/tmp/vnc/status'
+        vnc_ready = False
+        vnc_display = ':99'
+        novnc_port = '6080'
+
+        # Get external port mapping from environment variable (for Docker port mapping)
+        # Default to 56080 which is the standard external mapping for noVNC port 6080
+        external_novnc_port = os.environ.get('NOVNC_EXTERNAL_PORT', '56080')
+
+        if os.path.exists(vnc_status_file):
+            try:
+                with open(vnc_status_file, 'r') as f:
+                    status_lines = f.readlines()
+                    status_dict = {}
+                    for line in status_lines:
+                        if '=' in line:
+                            key, value = line.strip().split('=', 1)
+                            status_dict[key] = value
+
+                    vnc_ready = status_dict.get('READY', 'false') == 'true'
+                    vnc_display = status_dict.get('DISPLAY', ':99')
+                    novnc_port = status_dict.get('NOVNC_PORT', '6080')
+            except Exception as e:
+                pass
+
+        if action == 'redirect':
+            # Redirect to noVNC client using external port mapping with optimized parameters
+            novnc_url = f"http://localhost:{external_novnc_port}/vnc.html?autoconnect=true&resize=none&reconnect=true&reconnect_delay=1000&show_dot=true"
+            return redirect(novnc_url, code=302)
+
+        # Default: return info with optimized noVNC URL parameters
+        # Parameters:
+        # - autoconnect: Connect automatically on load
+        # - resize=scale: Scale the remote session to fit the viewport
+        # - reconnect: Automatically reconnect if connection is lost
+        # - reconnect_delay: Wait 1 second before reconnecting
+        # - show_dot: Show connection status indicator
+        return {
+            "vnc_ready": vnc_ready,
+            "vnc_display": vnc_display,
+            "novnc_port": novnc_port,
+            "external_novnc_port": external_novnc_port,
+            "novnc_url": f"http://localhost:{external_novnc_port}/vnc.html?autoconnect=true&resize=none&reconnect=true&reconnect_delay=1000&show_dot=true",
+            "instructions": "Click the noVNC URL to access the browser control interface" if vnc_ready else "VNC server is not running"
+        }
diff --git a/python/helpers/browser_control_client.py b/python/helpers/browser_control_client.py
new file mode 100644
index 0000000000..7a891fada9
--- /dev/null
+++ b/python/helpers/browser_control_client.py
@@ -0,0 +1,602 @@
+"""
+Browser Control Client - Playwright interface for browser automation.
+
+This module provides the PlaywrightClient for browser automation.
+"""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Dict, List, Optional
+import base64
+
+
+class ActionType(str, Enum):
+    """Supported action types for interface automation."""
+
+    CLICK = "click"
+    TYPE = "type"
+    SELECT = "select"
+    NAVIGATE = "navigate"
+    SCREENSHOT = "screenshot"
+    SCROLL = "scroll"
+    PRESS = "press"
+    HOVER = "hover"
+    PAUSE_FOR_USER = "pause_for_user"
+
+
+@dataclass
+class Action:
+    """Represents an action to be executed on an interface."""
+    
+    action_type: ActionType
+    selector: Optional[str] = None
+    value: Optional[str] = None
+    coordinates: Optional[Dict[str, int]] = None
+    metadata: Dict[str, Any] = None
+    
+    def __post_init__(self):
+        if self.metadata is None:
+            self.metadata = {}
+
+
+@dataclass
+class ActionResult:
+    """Result of executing an action on an interface."""
+    
+    success: bool
+    description: str
+    error: Optional[str] = None
+    screenshot: Optional[bytes] = None
+    task_complete: bool = False
+    metadata: Dict[str, Any] = None
+    
+    def __post_init__(self):
+        if self.metadata is None:
+            self.metadata = {}
+
+
+@dataclass
+class InterfaceState:
+    """Represents the current state of an interface."""
+    
+    url: Optional[str] = None
+    title: Optional[str] = None
+    content: str = ""
+    interactive_elements: List[Dict[str, Any]] = None
+    screenshot: Optional[bytes] = None
+    metadata: Dict[str, Any] = None
+    
+    def __post_init__(self):
+        if self.interactive_elements is None:
+            self.interactive_elements = []
+        if self.metadata is None:
+            self.metadata = {}
+
+
+@dataclass
+class BrowserControlState:
+    """State management for browser control tool."""
+    
+    playwright: Optional[Any] = None
+    browser: Optional[Any] = None
+    context: Optional[Any] = None
+    page: Optional[Any] = None
+    client: Optional['PlaywrightClient'] = None
+    initialized: bool = False
+    
+    def __del__(self):
+        """Cleanup on deletion."""
+        if self.initialized and self.client:
+            try:
+                import asyncio
+                # Try to close synchronously
+                try:
+                    loop = asyncio.get_event_loop()
+                    if loop.is_running():
+                        loop.create_task(self.client.close())
+                    else:
+                        asyncio.run(self.client.close())
+                except RuntimeError:
+                    pass
+            except Exception:
+                # Silently fail - destructor shouldn't raise
+                pass
+
+
+class PlaywrightClient:
+    """
+    Web interface automation using Playwright.
+    
+    Provides browser automation capabilities for web applications.
+    """
+    
+    def __init__(
+        self,
+        start_url: str = "https://www.google.com",
+        headless: bool = True,
+        playwright_binary: Optional[str] = None,
+        cdp_url: Optional[str] = None,
+        use_vnc: bool = False,
+        vnc_display: Optional[str] = None
+    ):
+        """
+        Initialize Playwright web client.
+
+        Args:
+            start_url: Initial URL to navigate to
+            headless: Whether to run browser in headless mode
+            playwright_binary: Path to Playwright binary (optional)
+            cdp_url: Chrome DevTools Protocol URL to connect to existing browser (optional)
+                    e.g., "http://localhost:9222" or "http://host.docker.internal:9222"
+            use_vnc: Whether to use VNC display for browser visibility
+            vnc_display: VNC display number (e.g., ":99"). If None, read from VNC_DISPLAY env var
+        """
+        self.start_url = start_url
+        self.headless = headless
+        self.playwright_binary = playwright_binary
+        self.cdp_url = cdp_url
+        self.use_vnc = use_vnc
+        self.vnc_display = vnc_display
+        self.playwright = None
+        self.browser = None
+        self.context = None
+        self.page = None
+        self.action_history = []
+    
+    async def initialize(self) -> None:
+        """Initialize Playwright browser session."""
+        try:
+            from playwright.async_api import async_playwright
+        except ImportError:
+            raise ImportError(
+                "Playwright is not installed. Install with: pip install playwright"
+            )
+
+        # Configure VNC display if enabled
+        if self.use_vnc:
+            import os
+            # Get VNC display from instance variable or environment
+            vnc_display = self.vnc_display or os.environ.get('VNC_DISPLAY', ':99')
+            # Set DISPLAY environment variable for Playwright to use VNC
+            original_display = os.environ.get('DISPLAY')
+            os.environ['DISPLAY'] = vnc_display
+            print(f"Using VNC display: {vnc_display}")
+            # Store original display to restore if needed
+            self._original_display = original_display
+
+        self.playwright = await async_playwright().start()
+
+        # Connect via CDP if URL provided (native browser mode)
+        if self.cdp_url:
+            print(f"Connecting to browser via CDP: {self.cdp_url}")
+
+            # Handle host.docker.internal Host header issue
+            # Convert HTTP endpoint to WebSocket to bypass Chrome's Host header validation
+            endpoint_url = self.cdp_url
+            if endpoint_url.startswith("http://") and "host.docker.internal" in endpoint_url:
+                import re
+                port_match = re.search(r':(\d+)', endpoint_url)
+                if port_match:
+                    port = port_match.group(1)
+                    # Use WebSocket format - less strict Host header checking
+                    endpoint_url = f"ws://host.docker.internal:{port}"
+                    print(f"  → Converted to WebSocket: {endpoint_url}")
+
+            self.browser = await self.playwright.chromium.connect_over_cdp(
+                endpoint_url=endpoint_url,
+                timeout=30000  # 30 seconds
+            )
+            # Use the default context from the connected browser
+            self.context = self.browser.contexts[0] if self.browser.contexts else await self.browser.new_context(
+                viewport={"width": 800, "height": 1600}
+            )
+            # Use existing page or create new one
+            self.page = self.context.pages[0] if self.context.pages else await self.context.new_page()
+            # Navigate to start URL
+            await self.page.goto(self.start_url)
+        else:
+            # Launch browser with optional binary path (embedded browser mode)
+            launch_options = {
+                "headless": self.headless,
+                "args": ["--headless=new"] if self.headless else []
+            }
+            if self.playwright_binary:
+                launch_options["executable_path"] = self.playwright_binary
+
+            self.browser = await self.playwright.chromium.launch(**launch_options)
+
+            # Create context with viewport size matching browser_agent
+            self.context = await self.browser.new_context(
+                viewport={"width": 800, "height": 1600}
+            )
+            self.page = await self.context.new_page()
+            await self.page.goto(self.start_url)
+    
+    async def get_state(self, format: str = "hybrid") -> InterfaceState:
+        """Get current state of the web page."""
+        if not self.page:
+            raise RuntimeError("Browser not initialized. Call initialize() first.")
+        
+        state = InterfaceState(url=self.page.url, title=await self.page.title())
+        
+        if format in ["text", "hybrid"]:
+            # Get text content
+            state.content = await self.page.content()
+            
+            # Get interactive elements
+            elements = await self._get_interactive_elements()
+            state.interactive_elements = elements
+        
+        if format in ["visual", "hybrid"]:
+            # Get screenshot
+            state.screenshot = await self.get_screenshot()
+        
+        return state
+    
+    async def _get_interactive_elements(self) -> List[Dict[str, Any]]:
+        """Extract interactive elements from the page."""
+        if not self.page:
+            return []
+        
+        try:
+            elements = await self.page.evaluate(
+                """
+                () => {
+                    const interactiveSelectors = [
+                        'button', 'a', 'input', 'select', 'textarea',
+                        '[role="button"]', '[role="link"]', '[onclick]'
+                    ];
+                    
+                    const elements = [];
+                    interactiveSelectors.forEach(selector => {
+                        document.querySelectorAll(selector).forEach(el => {
+                            if (el.offsetParent !== null) {  // Is visible
+                                elements.push({
+                                    tag: el.tagName.toLowerCase(),
+                                    text: el.innerText || el.value || '',
+                                    type: el.type || '',
+                                    placeholder: el.placeholder || '',
+                                    href: el.href || '',
+                                    selector: el.id ? `#${el.id}` :
+                                             el.className ? `.${el.className.split(' ')[0]}` :
+                                             el.tagName.toLowerCase()
+                                });
+                            }
+                        });
+                    });
+                    return elements;
+                }
+                """
+            )
+            return elements
+        except Exception:
+            return []
+    
+    async def execute_action(self, action: Action) -> ActionResult:
+        """Execute an action on the web page."""
+        if not self.page:
+            raise RuntimeError("Browser not initialized. Call initialize() first.")
+        
+        try:
+            if action.action_type == ActionType.NAVIGATE:
+                if not action.value:
+                    raise ValueError("Navigate action requires a URL value")
+                
+                # Try navigation with robust fallback strategy
+                try:
+                    # First attempt: wait for networkidle (ideal but may timeout on slow sites)
+                    await self.page.goto(
+                        action.value, wait_until="networkidle", timeout=5000
+                    )
+                    result = ActionResult(
+                        success=True, description=f"Navigated to {action.value}"
+                    )
+                except Exception as e:
+                    # Fallback: if networkidle times out, check if page loaded at all
+                    current_url = self.page.url
+                    if current_url and (
+                        action.value in current_url or current_url != "about:blank"
+                    ):
+                        # Page loaded even if not fully idle - consider it a success
+                        try:
+                            # Wait a bit for DOM to be ready
+                            await self.page.wait_for_load_state(
+                                "domcontentloaded", timeout=5000
+                            )
+                        except:
+                            pass
+                        result = ActionResult(
+                            success=True,
+                            description=f"Navigated to {action.value} (page loaded but not fully idle)",
+                        )
+                    else:
+                        # Navigation truly failed
+                        raise e
+            
+            elif action.action_type == ActionType.CLICK:
+                if not action.selector:
+                    raise ValueError("Click action requires a selector")
+
+                # Try different selector strategies with detailed error tracking
+                clicked = False
+                selector = action.selector
+                attempted_selectors = []
+                last_error = None
+
+                # Strategy 1: Direct CSS selector (wait for visibility first)
+                try:
+                    # Wait for element to be visible before clicking
+                    await self.page.wait_for_selector(selector, state="visible", timeout=3000)
+                    await self.page.click(selector, timeout=2000)
+                    clicked = True
+                except Exception as e:
+                    attempted_selectors.append(f"CSS:{selector}")
+                    last_error = str(e)
+
+                    # Strategy 2: If selector contains :contains(), extract and try text-based
+                    if ":contains(" in selector and not clicked:
+                        import re
+                        match = re.search(r":contains\(['\"]?(.*?)['\"]?\)", selector)
+                        if match:
+                            text = match.group(1)
+
+                            # Try exact text match
+                            try:
+                                await self.page.wait_for_selector(f"text={text}", state="visible", timeout=2000)
+                                await self.page.click(f"text={text}", timeout=2000)
+                                clicked = True
+                                selector = f"text={text}"
+                            except Exception as e2:
+                                attempted_selectors.append(f"text={text}")
+                                last_error = str(e2)
+
+                                # Try partial text match
+                                try:
+                                    await self.page.click(f"text=/.*{text}.*/i", timeout=2000)
+                                    clicked = True
+                                    selector = f"text=/.*{text}.*/i"
+                                except Exception as e3:
+                                    attempted_selectors.append(f"text=/.*{text}.*/i")
+                                    last_error = str(e3)
+
+                                    # Try href match for links
+                                    try:
+                                        link_selector = f"a[href*='{text.lower()}']"
+                                        await self.page.click(link_selector, timeout=2000)
+                                        clicked = True
+                                        selector = link_selector
+                                    except Exception as e4:
+                                        attempted_selectors.append(link_selector)
+                                        last_error = str(e4)
+
+                    # Strategy 3: If plain text (not CSS), try as text selector
+                    if (
+                        not clicked
+                        and not selector.startswith("#")
+                        and not selector.startswith(".")
+                        and not selector.startswith("[")
+                    ):
+                        # Try exact text
+                        try:
+                            await self.page.click(f"text={selector}", timeout=2000)
+                            clicked = True
+                            selector = f"text={selector}"
+                        except Exception as e5:
+                            attempted_selectors.append(f"text={selector}")
+                            last_error = str(e5)
+
+                            # Try partial text match (case-insensitive)
+                            try:
+                                await self.page.click(f"text=/.*{selector}.*/i", timeout=2000)
+                                clicked = True
+                                selector = f"text=/.*{selector}.*/i"
+                            except Exception as e6:
+                                attempted_selectors.append(f"text=/.*{selector}.*/i")
+                                last_error = str(e6)
+
+                    # Strategy 4: Force click if element is covered (e.g., by ads)
+                    if not clicked:
+                        try:
+                            original_selector = action.selector
+                            # Try to locate the element and force click
+                            await self.page.click(original_selector, force=True, timeout=2000)
+                            clicked = True
+                            selector = f"{original_selector} (forced)"
+                        except Exception as e7:
+                            attempted_selectors.append(f"force:{original_selector}")
+                            last_error = str(e7)
+
+                    if not clicked:
+                        # Provide helpful error message with all attempted strategies
+                        error_msg = f"Failed to click element. Attempted selectors: {', '.join(attempted_selectors)}. Last error: {last_error}"
+                        raise Exception(error_msg)
+
+                result = ActionResult(
+                    success=True, description=f"Clicked on {selector}"
+                )
+            
+            elif action.action_type == ActionType.TYPE:
+                if not action.selector or not action.value:
+                    raise ValueError("Type action requires both selector and value")
+                # Wait for input to be visible before typing
+                await self.page.wait_for_selector(action.selector, state="visible", timeout=3000)
+                await self.page.fill(action.selector, action.value)
+                result = ActionResult(
+                    success=True,
+                    description=f"Typed '{action.value}' into {action.selector}",
+                )
+            
+            elif action.action_type == ActionType.SELECT:
+                if not action.selector or not action.value:
+                    raise ValueError("Select action requires both selector and value")
+                # Wait for select element to be visible
+                await self.page.wait_for_selector(action.selector, state="visible", timeout=3000)
+                await self.page.select_option(action.selector, action.value)
+                result = ActionResult(
+                    success=True,
+                    description=f"Selected '{action.value}' in {action.selector}",
+                )
+
+            elif action.action_type == ActionType.PRESS:
+                if not action.selector or not action.value:
+                    raise ValueError("Press action requires both selector and value")
+                # Wait for element to be visible before pressing key
+                await self.page.wait_for_selector(action.selector, state="visible", timeout=3000)
+                await self.page.press(action.selector, action.value)
+                result = ActionResult(
+                    success=True,
+                    description=f"Pressed '{action.value}' on {action.selector}",
+                )
+            
+            elif action.action_type == ActionType.SCROLL:
+                # Map direction to scroll values
+                direction = action.value or "down"
+                scroll_x, scroll_y = 0, 0
+                
+                if direction == "down":
+                    scroll_y = 500
+                elif direction == "up":
+                    scroll_y = -500
+                elif direction == "right":
+                    scroll_x = 500
+                elif direction == "left":
+                    scroll_x = -500
+                else:
+                    # If it's a number, use it directly for vertical scrolling
+                    try:
+                        scroll_y = int(direction)
+                    except ValueError:
+                        scroll_y = 500  # Default to scrolling down
+                
+                await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
+                result = ActionResult(
+                    success=True,
+                    description=f"Scrolled {direction} by {abs(scroll_y or scroll_x)} pixels",
+                )
+            
+            elif action.action_type == ActionType.HOVER:
+                if not action.selector:
+                    raise ValueError("Hover action requires a selector")
+                # Wait for element to be visible before hovering
+                await self.page.wait_for_selector(action.selector, state="visible", timeout=3000)
+                await self.page.hover(action.selector)
+                result = ActionResult(
+                    success=True, description=f"Hovered over {action.selector}"
+                )
+
+            elif action.action_type == ActionType.PAUSE_FOR_USER:
+                # Pause execution and wait for user interaction
+                # This is useful for CAPTCHAs, manual login, or other user interventions
+                wait_time = int(action.value) if action.value else 60
+                message = action.metadata.get("message", "Pausing for user interaction...")
+
+                # Check if VNC is available for user interaction
+                vnc_url = self.get_vnc_url(host="localhost", port=56080)
+
+                if not vnc_url and self.headless:
+                    # No VNC and headless - user has no way to interact
+                    result = ActionResult(
+                        success=False,
+                        description="",
+                        error="Cannot pause for user: browser is in headless mode and VNC is not available. Set headless=False or enable VNC when initializing the browser."
+                    )
+                else:
+                    # VNC is available or browser is visible - user can interact
+                    # Return immediately without blocking - let the agent handle the pause
+                    print(f"\n{'='*60}")
+                    print(f"BROWSER READY FOR USER INTERACTION: {message}")
+                    print(f"Current URL: {self.page.url}")
+                    if vnc_url:
+                        print(f"VNC URL: {vnc_url}")
+                        print(f"Browser control panel will open automatically in web UI")
+                    else:
+                        print(f"Browser window should be visible on your display")
+                    print(f"Agent will wait up to {wait_time} seconds")
+                    print(f"{'='*60}\n")
+
+                    result = ActionResult(
+                        success=True,
+                        description=f"Browser ready for user interaction. Agent will pause for up to {wait_time} seconds. Current page: {self.page.url}"
+                    )
+
+            else:
+                result = ActionResult(
+                    success=False,
+                    description="",
+                    error=f"Unsupported action type: {action.action_type}",
+                )
+            
+            # Record action in history
+            self.action_history.append(action)
+            
+            # Add screenshot if requested
+            if action.metadata.get("capture_screenshot", False):
+                result.screenshot = await self.get_screenshot()
+            
+            return result
+        
+        except Exception as e:
+            return ActionResult(success=False, description="", error=str(e))
+    
+    async def get_screenshot(self) -> bytes:
+        """Get screenshot of current page as PNG bytes."""
+        if not self.page:
+            raise RuntimeError("Browser not initialized. Call initialize() first.")
+        
+        return await self.page.screenshot(type="png", full_page=False)
+    
+    async def get_screenshot_base64(self) -> str:
+        """Get screenshot of current page as base64 string for LLM context."""
+        screenshot_bytes = await self.get_screenshot()
+        return base64.b64encode(screenshot_bytes).decode('utf-8')
+
+    def get_vnc_url(self, host: str = "localhost", port: int = 6080) -> Optional[str]:
+        """
+        Get the noVNC URL for manual browser control.
+
+        Args:
+            host: Host where noVNC is accessible (default: localhost)
+            port: Port where noVNC is accessible (default: 6080)
+
+        Returns:
+            noVNC URL if VNC is enabled, None otherwise
+        """
+        if not self.use_vnc:
+            return None
+
+        import os
+        # Check if VNC is ready
+        vnc_status_file = '/tmp/vnc/status'
+        if not os.path.exists(vnc_status_file):
+            return None
+
+        try:
+            with open(vnc_status_file, 'r') as f:
+                status_lines = f.readlines()
+                status_dict = {}
+                for line in status_lines:
+                    if '=' in line:
+                        key, value = line.strip().split('=', 1)
+                        status_dict[key] = value
+
+                vnc_ready = status_dict.get('READY', 'false') == 'true'
+                if not vnc_ready:
+                    return None
+
+                novnc_port = status_dict.get('NOVNC_PORT', str(port))
+                return f"http://{host}:{novnc_port}/vnc.html?autoconnect=true&resize=none"
+        except Exception:
+            return None
+
+    async def close(self) -> None:
+        """Close browser and clean up."""
+        if self.page:
+            await self.page.close()
+        if self.context:
+            await self.context.close()
+        if self.browser:
+            await self.browser.close()
+        if self.playwright:
+            await self.playwright.stop()
+
diff --git a/python/helpers/browser_use.py b/python/helpers/browser_use.py
deleted file mode 100644
index 5c1800d2e4..0000000000
--- a/python/helpers/browser_use.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from python.helpers import dotenv
-dotenv.save_dotenv_value("ANONYMIZED_TELEMETRY", "false")
-import browser_use
-import browser_use.utils
\ No newline at end of file
diff --git a/python/helpers/browser_use_monkeypatch.py b/python/helpers/browser_use_monkeypatch.py
deleted file mode 100644
index 8f77ca9e6b..0000000000
--- a/python/helpers/browser_use_monkeypatch.py
+++ /dev/null
@@ -1,162 +0,0 @@
-from typing import Any
-from browser_use.llm import ChatGoogle
-from python.helpers import dirty_json
-
-
-# ------------------------------------------------------------------------------
-# Gemini Helper for Output Conformance
-# ------------------------------------------------------------------------------
-# This function sanitizes and conforms the JSON output from Gemini to match
-# the specific schema expectations of the browser-use library. It handles
-# markdown fences, aliases actions (like 'complete_task' to 'done'), and
-# intelligently constructs a valid 'data' object for the final action.
-
-def gemini_clean_and_conform(text: str):
-    obj = None
-    try:
-        # dirty_json parser is robust enough to handle markdown fences
-        obj = dirty_json.parse(text)
-    except Exception:
-        return None  # return None if parsing fails
-
-    if not isinstance(obj, dict):
-        return None
-
-    # Conform actions to browser-use expectations
-    if isinstance(obj.get("action"), list):
-        normalized_actions = []
-        for item in obj["action"]:
-            if not isinstance(item, dict):
-                continue  # Skip non-dict items
-
-            action_key, action_value = next(iter(item.items()), (None, None))
-            if not action_key:
-                continue
-
-            # Alias 'complete_task' to 'done' to handle inconsistencies
-            if action_key == "complete_task":
-                action_key = "done"
-
-            # Create a mutable copy of the value
-            v = (action_value or {}).copy()
-
-            if action_key in ("scroll_down", "scroll_up", "scroll"):
-                is_down = action_key != "scroll_up"
-                v.setdefault("down", is_down)
-                v.setdefault("num_pages", 1.0)
-                normalized_actions.append({"scroll": v})
-            elif action_key == "go_to_url":
-                v.setdefault("new_tab", False)
-                normalized_actions.append({action_key: v})
-            elif action_key == "done":
-                # If `data` is missing, construct it from other keys
-                if "data" not in v:
-                    # Pop fields from the top-level `done` object
-                    response_text = v.pop("response", None)
-                    summary_text = v.pop("page_summary", None)
-                    title_text = v.pop("title", "Task Completed")
-
-                    final_response = response_text or "Task completed successfully." # browser-use expects string
-                    final_summary = summary_text or "No page summary available." # browser-use expects string
-
-                    v["data"] = {
-                        "title": title_text,
-                        "response": final_response,
-                        "page_summary": final_summary,
-                    }
-
-                v.setdefault("success", True)
-                normalized_actions.append({action_key: v})
-            else:
-                normalized_actions.append(item)
-        obj["action"] = normalized_actions
-
-    return dirty_json.stringify(obj)
-
-# ------------------------------------------------------------------------------
-# Monkey-patch for browser-use Gemini schema issue
-# ------------------------------------------------------------------------------
-# The original _fix_gemini_schema in browser_use.llm.google.chat.ChatGoogle
-# removes the 'title' property but fails to remove it from the 'required' list,
-# causing a validation error with the Gemini API. This patch corrects that behavior.
-
-def _patched_fix_gemini_schema(self, schema: dict[str, Any]) -> dict[str, Any]:
-    """
-    Convert a Pydantic model to a Gemini-compatible schema.
-
-    This function removes unsupported properties like 'additionalProperties' and resolves
-    $ref references that Gemini doesn't support.
-    """
-
-    # Handle $defs and $ref resolution
-    if '$defs' in schema:
-        defs = schema.pop('$defs')
-
-        def resolve_refs(obj: Any) -> Any:
-            if isinstance(obj, dict):
-                if '$ref' in obj:
-                    ref = obj.pop('$ref')
-                    ref_name = ref.split('/')[-1]
-                    if ref_name in defs:
-                        # Replace the reference with the actual definition
-                        resolved = defs[ref_name].copy()
-                        # Merge any additional properties from the reference
-                        for key, value in obj.items():
-                            if key != '$ref':
-                                resolved[key] = value
-                        return resolve_refs(resolved)
-                    return obj
-                else:
-                    # Recursively process all dictionary values
-                    return {k: resolve_refs(v) for k, v in obj.items()}
-            elif isinstance(obj, list):
-                return [resolve_refs(item) for item in obj]
-            return obj
-
-        schema = resolve_refs(schema)
-
-    # Remove unsupported properties
-    def clean_schema(obj: Any) -> Any:
-        if isinstance(obj, dict):
-            # Remove unsupported properties
-            cleaned = {}
-            for key, value in obj.items():
-                if key not in ['additionalProperties', 'title', 'default']:
-                    cleaned_value = clean_schema(value)
-                    # Handle empty object properties - Gemini doesn't allow empty OBJECT types
-                    if (
-                        key == 'properties'
-                        and isinstance(cleaned_value, dict)
-                        and len(cleaned_value) == 0
-                        and isinstance(obj.get('type', ''), str)
-                        and obj.get('type', '').upper() == 'OBJECT'
-                    ):
-                        # Convert empty object to have at least one property
-                        cleaned['properties'] = {'_placeholder': {'type': 'string'}}
-                    else:
-                        cleaned[key] = cleaned_value
-
-            # If this is an object type with empty properties, add a placeholder
-            if (
-                isinstance(cleaned.get('type', ''), str)
-                and cleaned.get('type', '').upper() == 'OBJECT'
-                and 'properties' in cleaned
-                and isinstance(cleaned['properties'], dict)
-                and len(cleaned['properties']) == 0
-            ):
-                cleaned['properties'] = {'_placeholder': {'type': 'string'}}
-
-            # PATCH: Also remove 'title' from the required list if it exists
-            if 'required' in cleaned and isinstance(cleaned.get('required'), list):
-                cleaned['required'] = [p for p in cleaned['required'] if p != 'title']
-
-            return cleaned
-        elif isinstance(obj, list):
-            return [clean_schema(item) for item in obj]
-        return obj
-
-    return clean_schema(schema)
-
-def apply():
-    """Applies the monkey-patch to ChatGoogle."""
-    ChatGoogle._fix_gemini_schema = _patched_fix_gemini_schema
diff --git a/python/helpers/mcp_handler.py b/python/helpers/mcp_handler.py
index 1a16acb49e..2c44ded817 100644
--- a/python/helpers/mcp_handler.py
+++ b/python/helpers/mcp_handler.py
@@ -1112,4 +1112,4 @@ def get_session_id(self) -> Optional[str]:
         """Get the current session ID if available (for streaming HTTP clients)."""
         if self.session_id_callback is not None:
             return self.session_id_callback()
-        return None
+        return None
\ No newline at end of file
diff --git a/python/helpers/mcp_server.py b/python/helpers/mcp_server.py
index 4c080da69c..0cbce8e6e2 100644
--- a/python/helpers/mcp_server.py
+++ b/python/helpers/mcp_server.py
@@ -430,4 +430,4 @@ async def mcp_middleware(request: Request, call_next):
             status_code=403, detail="MCP server is disabled in settings."
         )
 
-    return await call_next(request)
+    return await call_next(request)
\ No newline at end of file
diff --git a/python/helpers/playwright.py b/python/helpers/playwright.py
index 34f851ab63..9ce743e37e 100644
--- a/python/helpers/playwright.py
+++ b/python/helpers/playwright.py
@@ -1,6 +1,8 @@
 
 from pathlib import Path
 import subprocess
+import sys
+import platform
 from python.helpers import files
 
 
@@ -8,24 +10,98 @@
 # should work for both docker and local installation
 
 def get_playwright_binary():
+    """Get the Playwright Chromium binary path.
+
+    Looks for full Chromium browser first (supports both headless and visible mode),
+    falls back to headless shell if full browser not found.
+
+    Platform-aware: Only searches for binaries matching the current OS to prevent
+    attempting to run wrong-platform binaries (e.g., macOS binary in Linux Docker).
+    """
     pw_cache = Path(get_playwright_cache_dir())
-    headless_shell = next(pw_cache.glob("chromium_headless_shell-*/chrome-*/headless_shell"), None)
+
+    # Detect current platform
+    system = platform.system()
+
+    # Search for platform-specific full Chromium browser (supports visible mode)
+    full_browser = None
+    if system == "Darwin":  # macOS
+        full_browser = next(pw_cache.glob("chromium-*/chrome-mac/Chromium.app/Contents/MacOS/Chromium"), None)
+    elif system == "Linux":
+        full_browser = next(pw_cache.glob("chromium-*/chrome-linux/chrome"), None)
+    elif system == "Windows":
+        full_browser = next(pw_cache.glob("chromium-*/chrome-win/chrome.exe"), None)
+
+    if full_browser:
+        return full_browser
+
+    # Fallback to platform-specific headless shell (headless-only, can't show GUI)
+    headless_shell = None
+    if system == "Darwin":  # macOS
+        headless_shell = next(pw_cache.glob("chromium_headless_shell-*/chrome-mac/headless_shell"), None)
+    elif system == "Linux":
+        headless_shell = next(pw_cache.glob("chromium_headless_shell-*/chrome-linux/headless_shell"), None)
+    elif system == "Windows":
+        headless_shell = next(pw_cache.glob("chromium_headless_shell-*/chrome-win/headless_shell.exe"), None)
+
     return headless_shell
 
 def get_playwright_cache_dir():
     return files.get_abs_path("tmp/playwright")
 
 def ensure_playwright_binary():
+    """Ensure Playwright browser is installed.
+
+    Installs full Chromium browser (supports both visible and headless modes).
+    Falls back to headless shell only if full browser installation fails.
+
+    Cleans up wrong-platform binaries if found (e.g., macOS binary in Linux Docker).
+    """
+    import os
+    import shutil
+
     bin = get_playwright_binary()
     if not bin:
         cache = get_playwright_cache_dir()
-        import os
+        pw_cache = Path(cache)
+
+        # Clean up wrong-platform binaries to avoid confusion and save space
+        system = platform.system()
+        wrong_platform_dirs = []
+
+        if system != "Darwin":  # Not macOS - remove macOS binaries
+            wrong_platform_dirs.extend(pw_cache.glob("chromium-*/chrome-mac"))
+        if system != "Linux":  # Not Linux - remove Linux binaries
+            wrong_platform_dirs.extend(pw_cache.glob("chromium-*/chrome-linux"))
+        if system != "Windows":  # Not Windows - remove Windows binaries
+            wrong_platform_dirs.extend(pw_cache.glob("chromium-*/chrome-win"))
+
+        for wrong_dir in wrong_platform_dirs:
+            print(f"Removing wrong-platform binary: {wrong_dir}")
+            # Remove the entire chromium-* directory, not just the platform subdirectory
+            chromium_dir = wrong_dir.parent
+            if chromium_dir.exists():
+                shutil.rmtree(chromium_dir)
+
         env = os.environ.copy()
         env["PLAYWRIGHT_BROWSERS_PATH"] = cache
-        subprocess.check_call(
-            ["playwright", "install", "chromium", "--only-shell"],
-            env=env
-        )
+
+        # Install full Chromium browser (supports both visible and headless modes)
+        print(f"Installing Playwright Chromium browser for {system} (supports visible mode)...")
+        try:
+            subprocess.check_call(
+                [sys.executable, "-m", "playwright", "install", "chromium"],
+                env=env
+            )
+        except subprocess.CalledProcessError as e:
+            print(f"Failed to install full Chromium: {e}")
+            print("Falling back to headless shell (headless-only)...")
+            # Fallback: install headless shell only
+            subprocess.check_call(
+                [sys.executable, "-m", "playwright", "install", "chromium", "--only-shell"],
+                env=env
+            )
+
     bin = get_playwright_binary()
     if not bin:
         raise Exception("Playwright binary not found after installation")
diff --git a/python/helpers/settings.py b/python/helpers/settings.py
index d882de94c9..a7a790d436 100644
--- a/python/helpers/settings.py
+++ b/python/helpers/settings.py
@@ -46,14 +46,6 @@ class Settings(TypedDict):
     embed_model_rl_requests: int
     embed_model_rl_input: int
 
-    browser_model_provider: str
-    browser_model_name: str
-    browser_model_api_base: str
-    browser_model_vision: bool
-    browser_model_rl_requests: int
-    browser_model_rl_input: int
-    browser_model_rl_output: int
-    browser_model_kwargs: dict[str, Any]
     browser_http_headers: dict[str, Any]
 
     agent_profile: str
@@ -429,106 +421,6 @@ def convert_out(settings: Settings) -> SettingsOutput:
         "tab": "agent",
     }
 
-    # embedding model section
-    browser_model_fields: list[SettingsField] = []
-    browser_model_fields.append(
-        {
-            "id": "browser_model_provider",
-            "title": "Web Browser model provider",
-            "description": "Select provider for web browser model used by <a href='https://github.com/browser-use/browser-use' target='_blank'>browser-use</a> framework",
-            "type": "select",
-            "value": settings["browser_model_provider"],
-            "options": cast(list[FieldOption], get_providers("chat")),
-        }
-    )
-    browser_model_fields.append(
-        {
-            "id": "browser_model_name",
-            "title": "Web Browser model name",
-            "description": "Exact name of model from selected provider",
-            "type": "text",
-            "value": settings["browser_model_name"],
-        }
-    )
-
-    browser_model_fields.append(
-        {
-            "id": "browser_model_api_base",
-            "title": "Web Browser model API base URL",
-            "description": "API base URL for web browser model. Leave empty for default. Only relevant for Azure, local and custom (other) providers.",
-            "type": "text",
-            "value": settings["browser_model_api_base"],
-        }
-    )
-
-    browser_model_fields.append(
-        {
-            "id": "browser_model_vision",
-            "title": "Use Vision",
-            "description": "Models capable of Vision can use it to analyze web pages from screenshots. Increases quality but also token usage.",
-            "type": "switch",
-            "value": settings["browser_model_vision"],
-        }
-    )
-
-    browser_model_fields.append(
-        {
-            "id": "browser_model_rl_requests",
-            "title": "Web Browser model rate limit requests",
-            "description": "Rate limit requests for web browser model.",
-            "type": "number",
-            "value": settings["browser_model_rl_requests"],
-        }
-    )
-
-    browser_model_fields.append(
-        {
-            "id": "browser_model_rl_input",
-            "title": "Web Browser model rate limit input",
-            "description": "Rate limit input for web browser model.",
-            "type": "number",
-            "value": settings["browser_model_rl_input"],
-        }
-    )
-
-    browser_model_fields.append(
-        {
-            "id": "browser_model_rl_output",
-            "title": "Web Browser model rate limit output",
-            "description": "Rate limit output for web browser model.",
-            "type": "number",
-            "value": settings["browser_model_rl_output"],
-        }
-    )
-
-    browser_model_fields.append(
-        {
-            "id": "browser_model_kwargs",
-            "title": "Web Browser model additional parameters",
-            "description": "Any other parameters supported by <a href='https://docs.litellm.ai/docs/set_keys' target='_blank'>LiteLLM</a>. Format is KEY=VALUE on individual lines, like .env file. Value can also contain JSON objects - when unquoted, it is treated as object, number etc., when quoted, it is treated as string.",
-            "type": "textarea",
-            "value": _dict_to_env(settings["browser_model_kwargs"]),
-        }
-    )
-
-    browser_model_fields.append(
-        {
-            "id": "browser_http_headers",
-            "title": "HTTP Headers",
-            "description": "HTTP headers to include with all browser requests. Format is KEY=VALUE on individual lines, like .env file. Value can also contain JSON objects - when unquoted, it is treated as object, number etc., when quoted, it is treated as string. Example: Authorization=Bearer token123",
-            "type": "textarea",
-            "value": _dict_to_env(settings.get("browser_http_headers", {})),
-        }
-    )
-
-    browser_model_section: SettingsSection = {
-        "id": "browser_model",
-        "title": "Web Browser Model",
-        "description": "Settings for the web browser model. Agent Zero uses <a href='https://github.com/browser-use/browser-use' target='_blank'>browser-use</a> agentic framework to handle web interactions.",
-        "fields": browser_model_fields,
-        "tab": "agent",
-    }
-
     # basic auth section
     auth_fields: list[SettingsField] = []
 
@@ -1257,7 +1149,6 @@ def convert_out(settings: Settings) -> SettingsOutput:
             agent_section,
             chat_model_section,
             util_model_section,
-            browser_model_section,
             embed_model_section,
             memory_section,
             speech_section,
@@ -1451,14 +1342,6 @@ def get_default_settings() -> Settings:
         embed_model_kwargs={},
         embed_model_rl_requests=0,
         embed_model_rl_input=0,
-        browser_model_provider="openrouter",
-        browser_model_name="openai/gpt-4.1",
-        browser_model_api_base="",
-        browser_model_vision=True,
-        browser_model_rl_requests=0,
-        browser_model_rl_input=0,
-        browser_model_rl_output=0,
-        browser_model_kwargs={"temperature": "0"},
         browser_http_headers={},
         memory_recall_enabled=True,
         memory_recall_delayed=False,
diff --git a/python/helpers/vector_db.py b/python/helpers/vector_db.py
index 2b94960e31..c68c517d17 100644
--- a/python/helpers/vector_db.py
+++ b/python/helpers/vector_db.py
@@ -147,4 +147,4 @@ def comparator(data: dict[str, Any]):
             # PrintStyle.error(f"Error evaluating condition: {e}")
             return False
 
-    return comparator
+    return comparator
\ No newline at end of file
diff --git a/python/tools/browser_agent.py b/python/tools/browser_agent.py
deleted file mode 100644
index 6d5f085b26..0000000000
--- a/python/tools/browser_agent.py
+++ /dev/null
@@ -1,428 +0,0 @@
-import asyncio
-import time
-from typing import Optional, cast
-from agent import Agent, InterventionException
-from pathlib import Path
-
-from python.helpers.tool import Tool, Response
-from python.helpers import files, defer, persist_chat, strings
-from python.helpers.browser_use import browser_use  # type: ignore[attr-defined]
-from python.helpers.print_style import PrintStyle
-from python.helpers.playwright import ensure_playwright_binary
-from python.helpers.secrets import get_secrets_manager
-from python.extensions.message_loop_start._10_iteration_no import get_iter_no
-from pydantic import BaseModel
-import uuid
-from python.helpers.dirty_json import DirtyJson
-
-
-class State:
-    @staticmethod
-    async def create(agent: Agent):
-        state = State(agent)
-        return state
-
-    def __init__(self, agent: Agent):
-        self.agent = agent
-        self.browser_session: Optional[browser_use.BrowserSession] = None
-        self.task: Optional[defer.DeferredTask] = None
-        self.use_agent: Optional[browser_use.Agent] = None
-        self.secrets_dict: Optional[dict[str, str]] = None
-        self.iter_no = 0
-
-    def __del__(self):
-        self.kill_task()
-        files.delete_dir(self.get_user_data_dir()) # cleanup user data dir
-
-    def get_user_data_dir(self):
-        return str(
-            Path.home()
-            / ".config"
-            / "browseruse"
-            / "profiles"
-            / f"agent_{self.agent.context.id}"
-        )
-
-    async def _initialize(self):
-        if self.browser_session:
-            return
-
-        # for some reason we need to provide exact path to headless shell, otherwise it looks for headed browser
-        pw_binary = ensure_playwright_binary()
-                
-        self.browser_session = browser_use.BrowserSession(
-            browser_profile=browser_use.BrowserProfile(
-                headless=True,
-                disable_security=True,
-                chromium_sandbox=False,
-                accept_downloads=True,
-                downloads_path=files.get_abs_path("tmp/downloads"),
-                allowed_domains=["*", "http://*", "https://*"],
-                executable_path=pw_binary,
-                keep_alive=True,
-                minimum_wait_page_load_time=1.0,
-                wait_for_network_idle_page_load_time=2.0,
-                maximum_wait_page_load_time=10.0,
-                window_size={"width": 1024, "height": 2048},
-                screen={"width": 1024, "height": 2048},
-                viewport={"width": 1024, "height": 2048},
-                no_viewport=False,
-                args=["--headless=new"],
-                # Use a unique user data directory to avoid conflicts
-                user_data_dir=self.get_user_data_dir(),
-                extra_http_headers=self.agent.config.browser_http_headers or {},
-                )
-        )
-
-        await self.browser_session.start() if self.browser_session else None
-        # self.override_hooks()
-
-        # --------------------------------------------------------------------------
-        # Patch to enforce vertical viewport size
-        # --------------------------------------------------------------------------
-        # Browser-use auto-configuration overrides viewport settings, causing wrong
-        # aspect ratio. We fix this by directly setting viewport size after startup.
-        # --------------------------------------------------------------------------
-
-        if self.browser_session:
-            try:
-                page = await self.browser_session.get_current_page()
-                if page:
-                    await page.set_viewport_size({"width": 1024, "height": 2048})
-            except Exception as e:
-                PrintStyle().warning(f"Could not force set viewport size: {e}")
-
-        # --------------------------------------------------------------------------    
-        
-        # Add init script to the browser session
-        if self.browser_session and self.browser_session.browser_context:
-            js_override = files.get_abs_path("lib/browser/init_override.js")
-            await self.browser_session.browser_context.add_init_script(path=js_override) if self.browser_session else None
-
-    def start_task(self, task: str):
-        if self.task and self.task.is_alive():
-            self.kill_task()
-
-        self.task = defer.DeferredTask(
-            thread_name="BrowserAgent" + self.agent.context.id
-        )
-        if self.agent.context.task:
-            self.agent.context.task.add_child_task(self.task, terminate_thread=True)
-        self.task.start_task(self._run_task, task) if self.task else None
-        return self.task
-
-    def kill_task(self):
-        if self.task:
-            self.task.kill(terminate_thread=True)
-            self.task = None
-        if self.browser_session:
-            try:
-                import asyncio
-
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-                loop.run_until_complete(self.browser_session.close()) if self.browser_session else None
-                loop.close()
-            except Exception as e:
-                PrintStyle().error(f"Error closing browser session: {e}")
-            finally:
-                self.browser_session = None
-        self.use_agent = None
-        self.iter_no = 0
-
-    async def _run_task(self, task: str):
-        await self._initialize()
-
-        class DoneResult(BaseModel):
-            title: str
-            response: str
-            page_summary: str
-
-        # Initialize controller
-        controller = browser_use.Controller(output_model=DoneResult)
-
-        # Register custom completion action with proper ActionResult fields
-        @controller.registry.action("Complete task", param_model=DoneResult)
-        async def complete_task(params: DoneResult):
-            result = browser_use.ActionResult(
-                is_done=True, success=True, extracted_content=params.model_dump_json()
-            )
-            return result
-
-        model = self.agent.get_browser_model()
-
-        try:
-
-            secrets_manager = get_secrets_manager(self.agent.context)
-            secrets_dict = secrets_manager.load_secrets()
-
-            self.use_agent = browser_use.Agent(
-                task=task,
-                browser_session=self.browser_session,
-                llm=model,
-                use_vision=self.agent.config.browser_model.vision,
-                extend_system_message=self.agent.read_prompt(
-                    "prompts/browser_agent.system.md"
-                ),
-                controller=controller,
-                enable_memory=False,  # Disable memory to avoid state conflicts
-                llm_timeout=3000, # TODO rem
-                sensitive_data=cast(dict[str, str | dict[str, str]] | None, secrets_dict or {}),  # Pass secrets
-            )
-        except Exception as e:
-            raise Exception(
-                f"Browser agent initialization failed. This might be due to model compatibility issues. Error: {e}"
-            ) from e
-
-        self.iter_no = get_iter_no(self.agent)
-
-        async def hook(agent: browser_use.Agent):
-            await self.agent.wait_if_paused()
-            if self.iter_no != get_iter_no(self.agent):
-                raise InterventionException("Task cancelled")
-
-        # try:
-        result = None
-        if self.use_agent:
-            result = await self.use_agent.run(
-                max_steps=50, on_step_start=hook, on_step_end=hook
-            )
-        return result
-
-    async def get_page(self):
-        if self.use_agent and self.browser_session:
-            try:
-                return await self.use_agent.browser_session.get_current_page() if self.use_agent.browser_session else None
-            except Exception:
-                # Browser session might be closed or invalid
-                return None
-        return None
-
-    async def get_selector_map(self):
-        """Get the selector map for the current page state."""
-        if self.use_agent:
-            await self.use_agent.browser_session.get_state_summary(cache_clickable_elements_hashes=True) if self.use_agent.browser_session else None
-            return await self.use_agent.browser_session.get_selector_map() if self.use_agent.browser_session else None
-            await self.use_agent.browser_session.get_state_summary(
-                cache_clickable_elements_hashes=True
-            )
-            return await self.use_agent.browser_session.get_selector_map()
-        return {}
-
-
-class BrowserAgent(Tool):
-
-    async def execute(self, message="", reset="", **kwargs):
-        self.guid = self.agent.context.generate_id() # short random id
-        reset = str(reset).lower().strip() == "true"
-        await self.prepare_state(reset=reset)
-        message = get_secrets_manager(self.agent.context).mask_values(message, placeholder="<secret>{key}</secret>") # mask any potential passwords passed from A0 to browser-use to browser-use format
-        task = self.state.start_task(message) if self.state else None
-
-        # wait for browser agent to finish and update progress with timeout
-        timeout_seconds = 300  # 5 minute timeout
-        start_time = time.time()
-
-        fail_counter = 0
-        while not task.is_ready() if task else False:
-            # Check for timeout to prevent infinite waiting
-            if time.time() - start_time > timeout_seconds:
-                PrintStyle().warning(
-                    self._mask(f"Browser agent task timeout after {timeout_seconds} seconds, forcing completion")
-                )
-                break
-
-            await self.agent.handle_intervention()
-            await asyncio.sleep(1)
-            try:
-                if task and task.is_ready():  # otherwise get_update hangs
-                    break
-                try:
-                    update = await asyncio.wait_for(self.get_update(), timeout=10)
-                    fail_counter = 0  # reset on success
-                except asyncio.TimeoutError:
-                    fail_counter += 1
-                    PrintStyle().warning(
-                        self._mask(f"browser_agent.get_update timed out ({fail_counter}/3)")
-                    )
-                    if fail_counter >= 3:
-                        PrintStyle().warning(
-                            self._mask("3 consecutive browser_agent.get_update timeouts, breaking loop")
-                        )
-                        break
-                    continue
-                update_log = update.get("log", get_use_agent_log(None))
-                self.update_progress("\n".join(update_log))
-                screenshot = update.get("screenshot", None)
-                if screenshot:
-                    self.log.update(screenshot=screenshot)
-            except Exception as e:
-                PrintStyle().error(self._mask(f"Error getting update: {str(e)}"))
-
-        if task and not task.is_ready():
-            PrintStyle().warning(self._mask("browser_agent.get_update timed out, killing the task"))
-            self.state.kill_task() if self.state else None
-            return Response(
-                message=self._mask("Browser agent task timed out, not output provided."),
-                break_loop=False,
-            )
-
-        # final progress update
-        if self.state and self.state.use_agent:
-            log_final = get_use_agent_log(self.state.use_agent)
-            self.update_progress("\n".join(log_final))
-
-        # collect result with error handling
-        try:
-            result = await task.result() if task else None
-        except Exception as e:
-            PrintStyle().error(self._mask(f"Error getting browser agent task result: {str(e)}"))
-            # Return a timeout response if task.result() fails
-            answer_text = self._mask(f"Browser agent task failed to return result: {str(e)}")
-            self.log.update(answer=answer_text)
-            return Response(message=answer_text, break_loop=False)
-        # finally:
-        #     # Stop any further browser access after task completion
-        #     # self.state.kill_task()
-        #     pass
-
-        # Check if task completed successfully
-        if result and result.is_done():
-            answer = result.final_result()
-            try:
-                if answer and isinstance(answer, str) and answer.strip():
-                    answer_data = DirtyJson.parse_string(answer)
-                    answer_text = strings.dict_to_text(answer_data)  # type: ignore
-                else:
-                    answer_text = (
-                        str(answer) if answer else "Task completed successfully"
-                    )
-            except Exception as e:
-                answer_text = (
-                    str(answer)
-                    if answer
-                    else f"Task completed with parse error: {str(e)}"
-                )
-        else:
-            # Task hit max_steps without calling done()
-            urls = result.urls() if result else []
-            current_url = urls[-1] if urls else "unknown"
-            answer_text = (
-                f"Task reached step limit without completion. Last page: {current_url}. "
-                f"The browser agent may need clearer instructions on when to finish."
-            )
-
-        # Mask answer for logs and response
-        answer_text = self._mask(answer_text)
-
-        # update the log (without screenshot path here, user can click)
-        self.log.update(answer=answer_text)
-
-        # add screenshot to the answer if we have it
-        if (
-            self.log.kvps
-            and "screenshot" in self.log.kvps
-            and self.log.kvps["screenshot"]
-        ):
-            path = self.log.kvps["screenshot"].split("//", 1)[-1].split("&", 1)[0]
-            answer_text += f"\n\nScreenshot: {path}"
-
-        # respond (with screenshot path)
-        return Response(message=answer_text, break_loop=False)
-
-    def get_log_object(self):
-        return self.agent.context.log.log(
-            type="browser",
-            heading=f"icon://captive_portal {self.agent.agent_name}: Calling Browser Agent",
-            content="",
-            kvps=self.args,
-        )
-
-    async def get_update(self):
-        await self.prepare_state()
-
-        result = {}
-        agent = self.agent
-        ua = self.state.use_agent if self.state else None
-        page = await self.state.get_page() if self.state else None
-
-        if ua and page:
-            try:
-
-                async def _get_update():
-
-                    # await agent.wait_if_paused() # no need here
-
-                    # Build short activity log
-                    result["log"] = get_use_agent_log(ua)
-
-                    path = files.get_abs_path(
-                        persist_chat.get_chat_folder_path(agent.context.id),
-                        "browser",
-                        "screenshots",
-                        f"{self.guid}.png",
-                    )
-                    files.make_dirs(path)
-                    await page.screenshot(path=path, full_page=False, timeout=3000)
-                    result["screenshot"] = f"img://{path}&t={str(time.time())}"
-
-                if self.state and self.state.task and not self.state.task.is_ready():
-                    await self.state.task.execute_inside(_get_update)
-
-            except Exception:
-                pass
-
-        return result
-
-    async def prepare_state(self, reset=False):
-        self.state = self.agent.get_data("_browser_agent_state")
-        if reset and self.state:
-            self.state.kill_task()
-        if not self.state or reset:
-            self.state = await State.create(self.agent)
-        self.agent.set_data("_browser_agent_state", self.state)
-
-    def update_progress(self, text):
-        text = self._mask(text)
-        short = text.split("\n")[-1]
-        if len(short) > 50:
-            short = short[:50] + "..."
-        progress = f"Browser: {short}"
-
-        self.log.update(progress=text)
-        self.agent.context.log.set_progress(progress)
-
-    def _mask(self, text: str) -> str:
-        try:
-            return get_secrets_manager(self.agent.context).mask_values(text or "")
-        except Exception as e:
-            return text or ""
-
-    # def __del__(self):
-    #     if self.state:
-    #         self.state.kill_task()
-
-
-def get_use_agent_log(use_agent: browser_use.Agent | None):
-    result = ["🚦 Starting task"]
-    if use_agent:
-        action_results = use_agent.history.action_results() or []
-        short_log = []
-        for item in action_results:
-            # final results
-            if item.is_done:
-                if item.success:
-                    short_log.append("✅ Done")
-                else:
-                    short_log.append(
-                        f"❌ Error: {item.error or item.extracted_content or 'Unknown error'}"
-                    )
-
-            # progress messages
-            else:
-                text = item.extracted_content
-                if text:
-                    first_line = text.split("\n", 1)[0][:200]
-                    short_log.append(first_line)
-        result.extend(short_log)
-    return result
diff --git a/python/tools/browser_control.py b/python/tools/browser_control.py
new file mode 100644
index 0000000000..98e5d7cf9b
--- /dev/null
+++ b/python/tools/browser_control.py
@@ -0,0 +1,534 @@
+"""
+Browser Control Tool - Granular browser control with individual actions.
+
+This tool provides precise browser automation through individual action methods
+(navigate, click, type, scroll, observe_page, etc.) following Agent Zero's
+tool-based architecture.
+"""
+
+import asyncio
+import time
+from typing import Optional
+from dataclasses import dataclass
+from agent import Agent, InterventionException
+from pathlib import Path
+
+from python.helpers.tool import Tool, Response
+from python.helpers import files, persist_chat
+from python.helpers.print_style import PrintStyle
+from python.helpers.playwright import ensure_playwright_binary
+from python.helpers.browser_control_client import (
+    PlaywrightClient,
+    BrowserControlState,
+    Action,
+    ActionType,
+    ActionResult,
+)
+
+
+class BrowserControl(Tool):
+    """
+    Browser Control tool for granular browser control.
+
+    Provides individual action methods for precise web automation.
+    """
+    
+    async def execute(self, **kwargs) -> Response:
+        """
+        Execute browser control action based on method name.
+        
+        Routes to specific methods like navigate, click, type, etc.
+        """
+        await self.agent.handle_intervention()
+        
+        # Generate unique GUID for screenshot naming
+        self.guid = self.agent.context.generate_id()
+        
+        method = self.method or "observe_page"
+        reset = str(kwargs.get("reset", "false")).lower() == "true"
+        
+        # Initialize/retrieve state
+        await self.prepare_state(reset=reset)
+        
+        # Route to specific method
+        result = None
+        try:
+            if method == "navigate":
+                result = await self._navigate(kwargs.get("url"))
+            elif method == "click":
+                result = await self._click(kwargs.get("selector"))
+            elif method == "type":
+                result = await self._type(kwargs.get("selector"), kwargs.get("text"))
+            elif method == "scroll":
+                result = await self._scroll(kwargs.get("direction", "down"))
+            elif method == "observe_page":
+                result = await self._observe_page()
+            elif method == "select":
+                result = await self._select(kwargs.get("selector"), kwargs.get("value"))
+            elif method == "press":
+                result = await self._press(kwargs.get("selector"), kwargs.get("key"))
+            elif method == "hover":
+                result = await self._hover(kwargs.get("selector"))
+            elif method == "pause_for_user":
+                result = await self._pause_for_user(
+                    kwargs.get("wait_seconds", 60),
+                    kwargs.get("message", "Pausing for user interaction...")
+                )
+            elif method == "get_browser_info":
+                result = await self._get_browser_info()
+            else:
+                result = f"Unknown method: {method}. Available methods: navigate, click, type, scroll, observe_page, select, press, hover, pause_for_user, get_browser_info"
+            
+            # Capture screenshot after action (UI display)
+            await self._capture_screenshot(method)
+            
+        except Exception as e:
+            result = f"Error executing {method}: {str(e)}"
+            PrintStyle().error(result)
+        
+        if not result:
+            result = f"Method {method} completed but returned no output"
+        
+        return Response(message=result, break_loop=False)
+    
+    async def prepare_state(self, reset: bool = False):
+        """
+        Initialize or retrieve Playwright state.
+        
+        Follows pattern from code_execution_tool state management.
+        """
+        self.state: Optional[BrowserControlState] = self.agent.get_data("_browser_control_state")
+
+        if reset and self.state and self.state.client:
+            # Close existing session
+            try:
+                await self.state.client.close()
+            except Exception as e:
+                PrintStyle().warning(f"Error closing existing session: {e}")
+            self.state = None
+        
+        if not self.state or not self.state.initialized:
+            # Create new Playwright session
+            try:
+                from playwright.async_api import async_playwright
+            except ImportError:
+                raise ImportError(
+                    "Playwright is not installed. Install with: pip install playwright"
+                )
+            
+            # Get Playwright binary path (only needed if not using CDP)
+            cdp_url = self.agent.config.browser_control_cdp_url
+            pw_binary = None if cdp_url else ensure_playwright_binary()
+
+            # Check if VNC is available and enabled
+            import os
+            use_vnc = os.path.exists('/tmp/vnc/status') and not cdp_url
+            vnc_display = os.environ.get('VNC_DISPLAY', ':99') if use_vnc else None
+
+            # Create client
+            client = PlaywrightClient(
+                start_url=self.agent.config.browser_control_start_url,
+                headless=self.agent.config.browser_control_headless,
+                playwright_binary=str(pw_binary) if pw_binary else None,
+                cdp_url=cdp_url if cdp_url else None,
+                use_vnc=use_vnc,
+                vnc_display=vnc_display
+            )
+            
+            # Initialize browser
+            await client.initialize()
+            
+            # Create state
+            self.state = BrowserControlState(
+                playwright=client.playwright,
+                browser=client.browser,
+                context=client.context,
+                page=client.page,
+                client=client,
+                initialized=True
+            )
+            
+            self.agent.set_data("_browser_control_state", self.state)
+
+        return self.state
+
+    async def _get_state(self) -> Optional[BrowserControlState]:
+        """Helper to get current state."""
+        return self.agent.get_data("_browser_control_state")
+    
+    async def _navigate(self, url: Optional[str]) -> str:
+        """Navigate to a URL with fallback handling."""
+        if not url:
+            return "Error: URL is required for navigate action"
+        
+        state = await self._get_state()
+        if not state or not state.client:
+            return "Error: Browser not initialized"
+        
+        action = Action(action_type=ActionType.NAVIGATE, value=url)
+        result = await state.client.execute_action(action)
+        
+        if result.success:
+            return result.description
+        else:
+            return f"Navigation failed: {result.error}"
+    
+    async def _click(self, selector: Optional[str]) -> str:
+        """Click element with selector strategies and text fallback."""
+        if not selector:
+            return "Error: Selector is required for click action"
+        
+        state = await self._get_state()
+        if not state or not state.client:
+            return "Error: Browser not initialized"
+        
+        action = Action(action_type=ActionType.CLICK, selector=selector)
+        result = await state.client.execute_action(action)
+        
+        if result.success:
+            return result.description
+        else:
+            return f"Click failed: {result.error}. Try a different selector or text content."
+    
+    async def _type(self, selector: Optional[str], text: Optional[str]) -> str:
+        """Type text into input field."""
+        if not selector:
+            return "Error: Selector is required for type action"
+        if not text:
+            return "Error: Text is required for type action"
+        
+        state = await self._get_state()
+        if not state or not state.client:
+            return "Error: Browser not initialized"
+        
+        action = Action(action_type=ActionType.TYPE, selector=selector, value=text)
+        result = await state.client.execute_action(action)
+        
+        if result.success:
+            return result.description
+        else:
+            return f"Type failed: {result.error}"
+    
+    async def _scroll(self, direction: str = "down") -> str:
+        """Scroll page up/down/left/right."""
+        state = await self._get_state()
+        if not state or not state.client:
+            return "Error: Browser not initialized"
+        
+        action = Action(action_type=ActionType.SCROLL, value=direction)
+        result = await state.client.execute_action(action)
+        
+        if result.success:
+            return result.description
+        else:
+            return f"Scroll failed: {result.error}"
+    
+    async def _select(self, selector: Optional[str], value: Optional[str]) -> str:
+        """Select option from dropdown."""
+        if not selector:
+            return "Error: Selector is required for select action"
+        if not value:
+            return "Error: Value is required for select action"
+        
+        state = await self._get_state()
+        if not state or not state.client:
+            return "Error: Browser not initialized"
+        
+        action = Action(action_type=ActionType.SELECT, selector=selector, value=value)
+        result = await state.client.execute_action(action)
+        
+        if result.success:
+            return result.description
+        else:
+            return f"Select failed: {result.error}"
+    
+    async def _press(self, selector: Optional[str], key: Optional[str]) -> str:
+        """Press keyboard key on element."""
+        if not selector:
+            return "Error: Selector is required for press action"
+        if not key:
+            return "Error: Key is required for press action"
+        
+        state = await self._get_state()
+        if not state or not state.client:
+            return "Error: Browser not initialized"
+        
+        action = Action(action_type=ActionType.PRESS, selector=selector, value=key)
+        result = await state.client.execute_action(action)
+        
+        if result.success:
+            return result.description
+        else:
+            return f"Press failed: {result.error}"
+    
+    async def _hover(self, selector: Optional[str]) -> str:
+        """Hover over element."""
+        if not selector:
+            return "Error: Selector is required for hover action"
+        
+        state = await self._get_state()
+        if not state or not state.client:
+            return "Error: Browser not initialized"
+        
+        action = Action(action_type=ActionType.HOVER, selector=selector)
+        result = await state.client.execute_action(action)
+        
+        if result.success:
+            return result.description
+        else:
+            return f"Hover failed: {result.error}"
+    
+    async def _observe_page(self) -> str:
+        """
+        Extract page content and add screenshot to LLM context.
+        
+        This method provides semantic content extraction and adds
+        screenshot to agent history for vision model analysis.
+        """
+        state = await self._get_state()
+        if not state or not state.client or not state.client.page:
+            return "Error: Browser not initialized"
+        
+        page = state.client.page
+        
+        # Build description of the page
+        try:
+            description = f"URL: {page.url}\n"
+            description += f"Title: {await page.title()}\n\n"
+            
+            # Extract semantic content (headings, articles, main content)
+            content_data = await page.evaluate(
+                """
+                () => {
+                    // Extract headings
+                    const headings = Array.from(document.querySelectorAll('h1, h2, h3'))
+                        .slice(0, 10)
+                        .map(h => `${h.tagName}: ${h.innerText.trim()}`)
+                        .filter(h => h.length > 5);
+                    
+                    // Extract article content or main content
+                    let mainText = '';
+                    const article = document.querySelector('article, main, [role="main"]');
+                    if (article) {
+                        mainText = article.innerText.substring(0, 5000);
+                    } else {
+                        mainText = document.body.innerText.substring(0, 5000);
+                    }
+                    
+                    return {
+                        headings: headings,
+                        text: mainText
+                    };
+                }
+                """
+            )
+            
+            # Format the content
+            if content_data.get("headings"):
+                description += "Key headings:\n"
+                for heading in content_data["headings"][:8]:
+                    description += f"  {heading}\n"
+                description += "\n"
+            
+            if content_data.get("text"):
+                description += f"Page content:\n{content_data['text']}\n\n"
+            
+            # Get interactive elements
+            interface_state = await state.client.get_state("text")
+            if interface_state.interactive_elements:
+                description += f"Interactive elements: {len(interface_state.interactive_elements)} found\n"
+                description += "Key elements:\n"
+                # Filter for meaningful elements
+                meaningful_elements = [
+                    elem
+                    for elem in interface_state.interactive_elements[:15]
+                    if elem.get("text", "").strip()
+                    and len(elem.get("text", "").strip()) > 2
+                ]
+                for elem in meaningful_elements[:10]:
+                    text = elem.get("text", "").strip()[:50]
+                    tag = elem.get("tag", "")
+                    if text:
+                        description += f"  - {tag}: {text}\n"
+            
+            # Add screenshot to agent history for vision model analysis
+            if self.agent.config.chat_model.vision:
+                try:
+                    screenshot_b64 = await state.client.get_screenshot_base64()
+                    # Add to history as multimodal content
+                    self.agent.hist_add_message(
+                        False,  # Not user message
+                        content={
+                            "role": "user",
+                            "type": "image",
+                            "image": screenshot_b64,
+                            "description": f"Screenshot of current page: {page.url}"
+                        }
+                    )
+                except Exception as e:
+                    PrintStyle().warning(f"Could not add screenshot to context: {e}")
+            
+            return description
+
+        except Exception as e:
+            return f"Error extracting page content: {str(e)}"
+
+    async def _pause_for_user(self, wait_seconds: int = 60, message: str = "Pausing for user interaction...") -> str:
+        """
+        Pause execution to allow user to manually interact with the browser.
+
+        This is useful for:
+        - Solving CAPTCHAs
+        - Manual login when automation is blocked
+        - Accepting cookies/terms manually
+        - Any other manual intervention needed
+
+        Args:
+            wait_seconds: How long to wait for user interaction (default 60 seconds)
+            message: Custom message to display to user
+
+        Note: If VNC is enabled, a URL will be provided for browser access.
+        """
+        state = await self._get_state()
+        if not state or not state.client:
+            return "Error: Browser not initialized"
+
+        # Check for VNC URL first
+        vnc_url = state.client.get_vnc_url(host="localhost", port=56080)
+
+        # Build initial message with VNC URL
+        initial_message = f"⏸️  **Browser Pause Requested**\n\n{message}\n\n"
+
+        if vnc_url:
+            initial_message += f"🌐 **Control Browser**: {vnc_url}\n\n"
+            initial_message += "Click the link above to access the browser and complete the manual task.\n"
+            initial_message += f"⏱️  Waiting up to {wait_seconds} seconds for you to complete the task...\n\n"
+            initial_message += "The browser control panel should open automatically in the web interface."
+        else:
+            initial_message += "⚠️  VNC is not available. Browser should be visible on your display.\n"
+            initial_message += f"⏱️  Waiting up to {wait_seconds} seconds..."
+
+        # Update log with initial message so frontend can show browser panel
+        self.log.update(message=initial_message)
+
+        # Call the client to mark the pause (returns immediately now)
+        action = Action(
+            action_type=ActionType.PAUSE_FOR_USER,
+            value=str(wait_seconds),
+            metadata={"message": message}
+        )
+        result = await state.client.execute_action(action)
+
+        if not result.success:
+            return f"Pause failed: {result.error}"
+
+        # Now actually pause/wait at the Agent level
+        import asyncio
+        PrintStyle().info(f"Browser paused for user interaction. Waiting {wait_seconds} seconds...")
+
+        try:
+            await asyncio.sleep(wait_seconds)
+            completion_message = f"✅ Browser pause completed. Resuming agent execution.\n\nCurrent page: {state.client.page.url}"
+        except asyncio.CancelledError:
+            completion_message = "Browser pause interrupted. Resuming agent execution."
+
+        return completion_message
+
+    async def _get_browser_info(self) -> str:
+        """
+        Get diagnostic information about the browser configuration.
+
+        Returns current browser state including visibility mode,
+        configuration settings, and helpful troubleshooting info.
+        """
+        state = await self._get_state()
+
+        info = []
+        info.append("=== Browser Configuration ===")
+        info.append(f"Config headless mode: {self.agent.config.browser_control_headless}")
+        info.append(f"Config start URL: {self.agent.config.browser_control_start_url}")
+        info.append(f"Config timeout: {self.agent.config.browser_control_timeout}ms")
+        info.append("")
+
+        if state and state.client:
+            info.append("=== Browser State ===")
+            info.append(f"Browser initialized: Yes")
+            info.append(f"Browser headless mode: {state.client.headless}")
+            if state.client.page:
+                info.append(f"Current URL: {state.client.page.url}")
+                info.append(f"Page title: {await state.client.page.title()}")
+            info.append("")
+
+            # Provide helpful tips
+            info.append("=== Visibility Status ===")
+            if state.client.headless:
+                info.append("⚠️  Browser is running in HEADLESS mode (invisible)")
+                info.append("")
+                info.append("To see the browser window:")
+                info.append("1. Close current browser session with reset=true")
+                info.append("2. Set browser_control_headless=False in agent.py")
+                info.append("3. Restart the agent")
+                info.append("4. Or use: browser_control:navigate with reset='true'")
+            else:
+                info.append("✓ Browser is running in VISIBLE mode")
+                info.append("  A browser window should be visible on your screen")
+                info.append("")
+                info.append("If you don't see the window:")
+                info.append("- Check if it opened on another desktop/display")
+                info.append("- Look for Chrome in your taskbar/dock")
+                info.append("- Try alt-tabbing (Windows) or Command-Tab (Mac)")
+                info.append("- The window may be minimized or behind other windows")
+        else:
+            info.append("=== Browser State ===")
+            info.append("Browser not initialized yet")
+            info.append("First use of browser_control will initialize the browser")
+
+        return "\n".join(info)
+
+    async def _capture_screenshot(self, method: str):
+        """
+        Capture screenshot after action for UI display.
+        
+        """
+        try:
+            state = await self._get_state()
+            if not state or not state.client or not state.client.page:
+                return
+            
+            # Create screenshot directory
+            screenshot_path = files.get_abs_path(
+                persist_chat.get_chat_folder_path(self.agent.context.id),
+                "browser_control",
+                "screenshots",
+                f"{self.guid}.png"
+            )
+            files.make_dirs(screenshot_path)
+            
+            # Save screenshot to file (viewport only, not full page)
+            await state.client.page.screenshot(
+                path=screenshot_path,
+                full_page=False,
+                timeout=self.agent.config.browser_control_timeout
+            )
+            
+            # Update log with img:// protocol for UI display
+            screenshot_url = f"img://{screenshot_path}&t={str(time.time())}"
+            self.log.update(screenshot=screenshot_url)
+            
+        except Exception as e:
+            # Don't fail the tool execution if screenshot capture fails
+            PrintStyle().warning(f"Could not capture screenshot: {e}")
+    
+    def get_log_object(self):
+        """Override logging method to provide custom heading."""
+        if self.method:
+            heading = f"icon://web {self.agent.agent_name}: Using browser_control:{self.method}"
+        else:
+            heading = f"icon://web {self.agent.agent_name}: Using browser_control"
+        return self.agent.context.log.log(
+            type="tool", 
+            heading=heading, 
+            content="", 
+            kvps=self.args
+        )
+
diff --git a/requirements.txt b/requirements.txt
index f0391d266a..a9495ab8da 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,5 @@
 a2wsgi==1.10.8
 ansio==0.0.1
-browser-use==0.5.11
 docker==7.1.0
 duckduckgo-search==6.1.12
 faiss-cpu==1.11.0
@@ -19,7 +18,7 @@ langchain-unstructured[all-docs]==0.1.6
 openai-whisper==20240930
 lxml_html_clean==0.3.1
 markdown==3.7
-mcp==1.13.1
+mcp>=1.13.1
 newspaper3k==0.2.8
 paramiko==3.5.0
 playwright==1.52.0
diff --git a/webui/components/browser-control/browser-control-icons.html b/webui/components/browser-control/browser-control-icons.html
new file mode 100644
index 0000000000..5069a902c2
--- /dev/null
+++ b/webui/components/browser-control/browser-control-icons.html
@@ -0,0 +1,44 @@
+<html>
+<head>
+    <title>Browser Control Toggle Icon</title>
+    <script type="module">
+        import { store } from "/components/browser-control/browser-control-store.js";
+    </script>
+</head>
+<body>
+    <div x-data>
+        <template x-if="$store.browserControl">
+            <!-- Browser Control Toggle Button -->
+            <div class="notification-toggle"
+                 :class="{
+                     'has-notifications': $store.browserControl.vncReady
+                 }"
+                 @click="$store.browserControl.isVisible ? $store.browserControl.hide() : $store.browserControl.show()"
+                 title="Toggle Browser Control">
+                <div class="notification-icon">
+                    <span class="material-symbols-outlined">web</span>
+                </div>
+                <!-- Show indicator when VNC is ready -->
+                <span x-show="$store.browserControl.vncReady && !$store.browserControl.isVisible"
+                      class="browser-ready-indicator"
+                      title="Browser control available"></span>
+            </div>
+        </template>
+    </div>
+
+    <style>
+        /* Browser ready indicator - small blue dot */
+        .browser-ready-indicator {
+            position: absolute;
+            bottom: 2px;
+            right: 2px;
+            width: 8px;
+            height: 8px;
+            background: #4a9eff;
+            border-radius: 50%;
+            border: 2px solid var(--color-panel);
+            z-index: 1;
+        }
+    </style>
+</body>
+</html>
diff --git a/webui/components/browser-control/browser-control-store.js b/webui/components/browser-control/browser-control-store.js
new file mode 100644
index 0000000000..0c80107a9f
--- /dev/null
+++ b/webui/components/browser-control/browser-control-store.js
@@ -0,0 +1,65 @@
+import { createStore } from "/js/AlpineStore.js";
+
+const model = {
+    isVisible: false,
+    isMinimized: false,
+    isMaximized: false,
+    vncUrl: '',
+    vncReady: false,
+    _checkInterval: null,
+
+    init() {
+        this.checkVncAvailability();
+        // Poll for VNC availability every 3 seconds
+        this._checkInterval = setInterval(() => this.checkVncAvailability(), 3000);
+    },
+
+    async checkVncAvailability() {
+        try {
+            const response = await fetch('/browser_control?action=info');
+            const data = await response.json();
+            this.vncReady = data.vnc_ready;
+
+            if (data.vnc_ready && data.novnc_url) {
+                this.vncUrl = data.novnc_url;
+            }
+        } catch (error) {
+            console.log('VNC not available:', error);
+            this.vncReady = false;
+        }
+    },
+
+    show(url = null) {
+        if (url) {
+            this.vncUrl = url;
+        }
+        this.isVisible = true;
+        this.isMinimized = false;
+    },
+
+    hide() {
+        this.isVisible = false;
+    },
+
+    cleanup() {
+        if (this._checkInterval) {
+            clearInterval(this._checkInterval);
+            this._checkInterval = null;
+        }
+    },
+
+    toggleMinimize() {
+        this.isMinimized = !this.isMinimized;
+        if (this.isMinimized) {
+            this.isMaximized = false;
+        }
+    },
+
+    toggleMaximize() {
+        this.isMaximized = !this.isMaximized;
+    }
+};
+
+// Create and export the store
+const store = createStore("browserControl", model);
+export { store };
diff --git a/webui/components/browser-control/browser-panel.html b/webui/components/browser-control/browser-panel.html
new file mode 100644
index 0000000000..0d05e30fd6
--- /dev/null
+++ b/webui/components/browser-control/browser-panel.html
@@ -0,0 +1,373 @@
+<html>
+<head>
+    <title>Browser Control Panel</title>
+    <script type="module">
+        import { store } from "/components/browser-control/browser-control-store.js";
+    </script>
+</head>
+<body>
+    <div x-data>
+        <template x-if="$store.browserControl">
+            <div x-data="browserPanel()"
+                 x-show="$store.browserControl.isVisible"
+                 x-cloak
+                 class="browser-panel"
+                 :class="{
+                     'minimized': $store.browserControl.isMinimized,
+                     'maximized': $store.browserControl.isMaximized,
+                     'dragging': isDragging
+                 }"
+                 :style="{
+                     left: panelX !== null ? panelX + 'px' : '50%',
+                     top: panelY !== null ? panelY + 'px' : '50%',
+                     transform: panelX === null ? 'translate(-50%, -50%)' : 'none',
+                     transition: isDragging ? 'none' : ''
+                 }"
+                 x-init="initPosition()">
+
+        <!-- Panel Header -->
+        <div class="browser-panel-header" @mousedown="startDrag($event)">
+            <div class="browser-panel-title">
+                <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+                    <rect x="3" y="3" width="18" height="18" rx="2" ry="2"></rect>
+                    <line x1="9" y1="3" x2="9" y2="21"></line>
+                </svg>
+                <span>Browser Control</span>
+            </div>
+            <div class="browser-panel-controls">
+                <button @click="$store.browserControl.toggleMinimize()" title="Minimize" class="browser-panel-btn">
+                    <span x-show="!$store.browserControl.isMinimized">−</span>
+                    <span x-show="$store.browserControl.isMinimized">□</span>
+                </button>
+                <button @click="$store.browserControl.toggleMaximize()"
+                        title="Maximize"
+                        class="browser-panel-btn"
+                        x-show="!$store.browserControl.isMinimized">
+                    <span x-show="!$store.browserControl.isMaximized">⛶</span>
+                    <span x-show="$store.browserControl.isMaximized">⧉</span>
+                </button>
+                <button @click="$store.browserControl.hide()" title="Close" class="browser-panel-btn browser-panel-close">×</button>
+            </div>
+        </div>
+
+        <!-- Panel Content -->
+        <div class="browser-panel-content" x-show="!$store.browserControl.isMinimized">
+            <iframe
+                x-show="$store.browserControl.vncUrl"
+                :src="$store.browserControl.vncUrl + '&reconnect=true&quality=9&show_dot=true'"
+                class="browser-iframe"
+                frameborder="0"
+                allow="fullscreen"
+                allowfullscreen
+            ></iframe>
+            <div x-show="!$store.browserControl.vncUrl" class="browser-loading">
+                <div class="browser-loading-spinner"></div>
+                <p>Connecting to browser...</p>
+            </div>
+        </div>
+            </div>
+        </template>
+    </div>
+
+    <script>
+        function browserPanel() {
+            return {
+                isDragging: false,
+                panelX: null,
+                panelY: null,
+                dragStart: { x: 0, y: 0 },
+
+                initPosition() {
+                    // Watch for visibility changes and re-center when opened
+                    this.$watch('$store.browserControl.isVisible', (isVisible) => {
+                        if (isVisible && !this.$store.browserControl.isMaximized) {
+                            this.recenterPanel();
+                        }
+                    });
+
+                    // Center on first load if visible
+                    if (this.$store.browserControl.isVisible) {
+                        this.recenterPanel();
+                    }
+                },
+
+                recenterPanel() {
+                    // Reset position to null to trigger CSS centering
+                    this.panelX = null;
+                    this.panelY = null;
+
+                    const panel = this.$el;
+                    // Use requestAnimationFrame to ensure layout is complete
+                    requestAnimationFrame(() => {
+                        requestAnimationFrame(() => {
+                            const width = panel.offsetWidth;
+                            const height = panel.offsetHeight;
+                            this.panelX = Math.max(0, (window.innerWidth - width) / 2);
+                            this.panelY = Math.max(0, (window.innerHeight - height) / 2);
+                        });
+                    });
+                },
+
+                startDrag(event) {
+                    // Only handle left mouse button
+                    if (event.button !== 0) return;
+
+                    event.preventDefault();
+                    event.stopPropagation();
+
+                    if (this.$store.browserControl.isMaximized) return;
+
+                    this.isDragging = true;
+                    const panel = this.$el;
+
+                    this.dragStart.x = event.clientX - this.panelX;
+                    this.dragStart.y = event.clientY - this.panelY;
+
+                    // Disable iframe pointer events
+                    const iframe = panel.querySelector('.browser-iframe');
+                    if (iframe) iframe.style.pointerEvents = 'none';
+
+                    // Prevent text selection
+                    document.body.style.userSelect = 'none';
+
+                    const onMouseMove = (e) => {
+                        if (!this.isDragging) return;
+                        e.preventDefault();
+
+                        const x = e.clientX - this.dragStart.x;
+                        const y = e.clientY - this.dragStart.y;
+
+                        // Keep at least 100px visible
+                        const minVisible = 100;
+                        const maxX = window.innerWidth - minVisible;
+                        const maxY = window.innerHeight - minVisible;
+                        const minX = minVisible - panel.offsetWidth;
+
+                        this.panelX = Math.max(minX, Math.min(x, maxX));
+                        this.panelY = Math.max(0, Math.min(y, maxY));
+                    };
+
+                    const onMouseUp = () => {
+                        this.isDragging = false;
+                        document.removeEventListener('mousemove', onMouseMove);
+                        document.removeEventListener('mouseup', onMouseUp);
+                        document.body.style.userSelect = '';
+                        if (iframe) iframe.style.pointerEvents = '';
+                    };
+
+                    document.addEventListener('mousemove', onMouseMove);
+                    document.addEventListener('mouseup', onMouseUp);
+                }
+            };
+        }
+    </script>
+
+    <script>
+        document.addEventListener('alpine:init', () => {
+            const s = Alpine.store('browserControl');
+            if (s && typeof s.init === 'function') s.init();
+        });
+    </script>
+
+    <style>
+        /* Browser Control Panel Styles */
+        .browser-panel {
+            position: fixed;
+            z-index: 15000;
+            background: var(--color-panel);
+            border: 1px solid var(--color-border);
+            border-radius: 8px;
+            box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
+            display: flex;
+            flex-direction: column;
+            overflow: hidden;
+            transition: all 0.3s ease;
+            /* Increased size to better accommodate 1920x1080 VNC display */
+            width: 800px;
+            height: 800px;
+        }
+
+        .browser-panel.minimized {
+            width: 300px;
+            height: 40px;
+            bottom: 20px;
+            top: auto;
+            left: auto;
+            right: 20px;
+        }
+
+        .browser-panel.maximized {
+            width: calc(100vw - 40px) !important;
+            height: calc(100vh - 40px) !important;
+            top: 20px !important;
+            left: 20px !important;
+            transform: none !important;
+        }
+
+        /* Panel Header */
+        .browser-panel-header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            padding: 8px 12px;
+            background: var(--color-panel);
+            border-bottom: 1px solid var(--color-border);
+            cursor: move;
+            user-select: none;
+        }
+
+        .browser-panel-title {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            font-weight: 600;
+            font-size: 14px;
+            color: var(--color-primary);
+        }
+
+        .browser-panel-title svg {
+            color: var(--color-accent, #4a9eff);
+        }
+
+        .browser-panel-controls {
+            display: flex;
+            gap: 4px;
+        }
+
+        .browser-panel-btn {
+            width: 32px;
+            height: 32px;
+            border: none;
+            background: transparent;
+            color: var(--color-text);
+            font-size: 18px;
+            border-radius: 4px;
+            cursor: pointer;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            transition: all 0.2s;
+            opacity: 0.7;
+        }
+
+        .browser-panel-btn:hover {
+            background: var(--color-panel-hover, rgba(255, 255, 255, 0.1));
+            color: var(--color-primary);
+            opacity: 1;
+        }
+
+        .browser-panel-close:hover {
+            background: #e74c3c;
+            color: white;
+        }
+
+        /* Panel Content */
+        .browser-panel-content {
+            flex: 1;
+            display: flex;
+            flex-direction: column;
+            overflow: hidden;
+            position: relative;
+            background: #1a1a1a;
+        }
+
+        .browser-iframe {
+            width: 100%;
+            height: 100%;
+            border: none;
+        }
+
+        /* Loading State */
+        .browser-loading {
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            justify-content: center;
+            height: 100%;
+            color: var(--color-text);
+        }
+
+        .browser-loading-spinner {
+            width: 40px;
+            height: 40px;
+            border: 4px solid var(--color-border);
+            border-top-color: var(--color-accent, #4a9eff);
+            border-radius: 50%;
+            animation: spin 0.8s linear infinite;
+            margin-bottom: 16px;
+        }
+
+        @keyframes spin {
+            to { transform: rotate(360deg); }
+        }
+
+        /* Hide when x-cloak is present */
+        [x-cloak] {
+            display: none !important;
+        }
+
+        /* Responsive */
+        /* Tablets (landscape and portrait) */
+        @media (max-width: 1024px) {
+            .browser-panel:not(.maximized):not(.minimized) {
+                width: 700px;
+                height: 600px;
+            }
+        }
+
+        /* Mobile devices and small tablets */
+        @media (max-width: 768px) {
+            .browser-panel:not(.maximized):not(.minimized) {
+                width: calc(100vw - 20px);
+                height: calc(100vh - 100px);
+                left: 10px !important;
+                top: 50px !important;
+            }
+
+            .browser-panel.minimized {
+                width: 250px;
+                right: 10px;
+                bottom: 10px;
+            }
+
+            .browser-panel-header {
+                cursor: default;
+                -webkit-touch-callout: none;
+            }
+
+            .browser-panel-title {
+                font-size: 13px;
+            }
+        }
+
+        /* Extra small devices */
+        @media (max-width: 480px) {
+            .browser-panel:not(.maximized):not(.minimized) {
+                width: calc(100vw - 10px);
+                height: calc(100vh - 80px);
+                left: 5px !important;
+                top: 40px !important;
+            }
+
+            .browser-panel.minimized {
+                width: 200px;
+            }
+
+            .browser-panel-btn {
+                width: 28px;
+                height: 28px;
+                font-size: 16px;
+            }
+        }
+
+        /* Accessibility */
+        @media (prefers-reduced-motion: reduce) {
+            .browser-panel {
+                transition: none;
+            }
+        }
+
+
+    </style>
+</body>
+</html>
diff --git a/webui/components/chat/top-section/chat-top-store.js b/webui/components/chat/top-section/chat-top-store.js
index 8dd47b000d..7e0752c8eb 100644
--- a/webui/components/chat/top-section/chat-top-store.js
+++ b/webui/components/chat/top-section/chat-top-store.js
@@ -2,7 +2,8 @@ import { createStore } from "/js/AlpineStore.js";
 
 // define the model object holding data and functions
 const model = {
-  connected: false,
+  connected: false, // Shows whether agent is actively processing (green when true)
+  backendAlive: true, // Tracks backend connection health
 };
 
 // convert it to alpine store
diff --git a/webui/components/chat/top-section/chat-top.html b/webui/components/chat/top-section/chat-top.html
index 179495d6cc..e766146533 100644
--- a/webui/components/chat/top-section/chat-top.html
+++ b/webui/components/chat/top-section/chat-top.html
@@ -20,17 +20,19 @@
                 <div id="time-date"></div>
                 <div class="status-icon">
                     <svg viewBox="0 0 30 30">
-                        <!-- Connected State (filled circle) -->
+                        <!-- Active State (filled green circle) -->
                         <circle class="connected-circle" cx="15" cy="15" r="8"
                             x-bind:fill="$store.chatTop.connected ? '#00c340' : 'none'" x-bind:opacity="$store.chatTop.connected ? 1 : 0" />
 
-                        <!-- Disconnected State (outline circle) -->
-                        <circle class="disconnected-circle" cx="15" cy="15" r="12" fill="none" stroke="#e40138"
-                            stroke-width="3" x-bind:opacity="$store.chatTop.connected ? 0 : 1" />
+                        <!-- Idle State (filled blue circle) -->
+                        <circle class="disconnected-circle" cx="15" cy="15" r="8" fill="none" stroke="#e40138" stroke-width="3"
+                            x-bind:opacity="$store.chatTop.connected ? 0 : 1" />
                     </svg>
                 </div>
                 <!-- Notification Toggle positioned next to time-date -->
                 <x-component path="notifications/notification-icons.html"></x-component>
+                <!-- Browser Control Toggle -->
+                <x-component path="browser-control/browser-control-icons.html"></x-component>
                 <!-- Project Selector -->
                 <x-component path="projects/project-selector.html"></x-component>
             </div>
diff --git a/webui/index.html b/webui/index.html
index 7b62b4f2bf..d2252f3a49 100644
--- a/webui/index.html
+++ b/webui/index.html
@@ -99,6 +99,9 @@
 
     <script type="module" src="index.js"></script>
 
+    <!-- Browser Control Integration -->
+    <script type="module" src="js/browser-control-integration.js"></script>
+
     <!-- Bootstrap JS (only for logic, importing bundled CSS => UI conflicts) -->
     <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js"
         integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM"
@@ -1184,6 +1187,9 @@ <h2>Task Management</h2>
     <!-- Drag and Drop Overlay Component -->
     <x-component path="chat/attachments/dragDropOverlay.html"></x-component>
 
+    <!-- Browser Control Panel Component -->
+    <x-component path="browser-control/browser-panel.html"></x-component>
+
     <!-- Register Service Worker for offline support and caching -->
     <script>
         if ('serviceWorker' in navigator) {
diff --git a/webui/index.js b/webui/index.js
index d7db5fc6af..180e63de09 100644
--- a/webui/index.js
+++ b/webui/index.js
@@ -325,9 +325,12 @@ export async function poll() {
     //set ui model vars from backend
     inputStore.paused = response.paused;
 
-    // Update status icon state
+    // Update status icon to show backend connection (green when connected)
     setConnectionStatus(true);
 
+    // Track backend health
+    chatTopStore.backendAlive = true;
+
     // Update chats list using store
     let contexts = response.contexts || [];
     chatsStore.applyContexts(contexts);
@@ -383,6 +386,7 @@ export async function poll() {
   } catch (error) {
     console.error("Error:", error);
     setConnectionStatus(false);
+    chatTopStore.backendAlive = false;
   }
 
   return updated;
diff --git a/webui/js/browser-control-integration.js b/webui/js/browser-control-integration.js
new file mode 100644
index 0000000000..7b3c37fe40
--- /dev/null
+++ b/webui/js/browser-control-integration.js
@@ -0,0 +1,35 @@
+/**
+ * Browser Control Integration
+ *
+ * Provides global helper functions for browser control.
+ * Users can manually open the panel by clicking the browser control icon.
+ */
+
+// Wait for Alpine to be ready
+document.addEventListener('alpine:initialized', () => {
+    console.log('Browser Control Integration initialized');
+});
+
+// Global helper functions - can be called from anywhere in the app or console
+window.showBrowserControl = function(url) {
+    if (Alpine.store('browserControl')) {
+        Alpine.store('browserControl').show(url);
+    }
+};
+
+window.hideBrowserControl = function() {
+    if (Alpine.store('browserControl')) {
+        Alpine.store('browserControl').hide();
+    }
+};
+
+window.toggleBrowserControl = function() {
+    if (Alpine.store('browserControl')) {
+        const store = Alpine.store('browserControl');
+        if (store.isVisible) {
+            store.hide();
+        } else {
+            store.show();
+        }
+    }
+};