diff --git a/.dockerignore b/.dockerignore index 514136ac911..63599788338 100644 --- a/.dockerignore +++ b/.dockerignore @@ -80,7 +80,6 @@ src/node_modules !webview-ui/ !packages/evals/.docker/entrypoints/runner.sh !packages/build/ -!packages/cloud/ !packages/config-eslint/ !packages/config-typescript/ !packages/evals/ diff --git a/.env.sample b/.env.sample index d89ef727920..aebe5cca44a 100644 --- a/.env.sample +++ b/.env.sample @@ -3,3 +3,4 @@ POSTHOG_API_KEY=key-goes-here # Roo Code Cloud / Local Development CLERK_BASE_URL=https://epic-chamois-85.clerk.accounts.dev ROO_CODE_API_URL=http://localhost:3000 +ROO_CODE_PROVIDER_URL=http://localhost:8080/proxy/v1 diff --git a/.gitattributes b/.gitattributes index 284eab4f98a..e9e36432cdc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -6,6 +6,11 @@ src/assets/docs/demo.gif filter=lfs diff=lfs merge=lfs -text *.snap linguist-generated=true # Non-English translation files - mark as linguist-generated to exclude from GitHub language statistics +# Package NLS files - mark non-English ones as generated +src/package.nls.*.json linguist-generated=true +# Exclude the base English file from being marked as generated +src/package.nls.json linguist-generated=false + # Root locales directory (contains only non-English translations) locales/** linguist-generated=true diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 03bbe9640a8..443842c856f 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -1,12 +1,66 @@ name: Bug Report -description: Clearly report a bug with detailed repro steps +description: Report a broken behavior in plain language with a minimal reproduction labels: ["bug"] +title: "[BUG] " body: - type: markdown attributes: value: | - **Thanks for your report!** Please check existing issues first: - 👉 https://github.com/RooCodeInc/Roo-Code/issues + Thank you for your report! Please search existing issues first: + https://github.com/RooCodeInc/Roo-Code/issues + + - type: textarea + id: problem + attributes: + label: Problem (one or two sentences) + description: Describe what went wrong in plain language. + placeholder: 'Example: "Expected the task to start, but nothing happened and no message appeared."' + validations: + required: true + + - type: textarea + id: context + attributes: + label: Context (who is affected and when) + description: Who sees this and in what situation? Keep it non-technical. + placeholder: 'Example: "Happens to new users when starting a run from the New Run page with dark theme enabled."' + validations: + required: true + + - type: textarea + id: steps + attributes: + label: Reproduction steps + description: Provide clear, numbered steps so we can reproduce. + placeholder: | + 1) Environment/setup (OS, extension version, relevant settings) + 2) Exact actions (clicks, inputs, commands) + 3) What you observed after each step + validations: + required: true + + - type: input + id: expected + attributes: + label: Expected result + placeholder: e.g., "The task starts and shows progress." + validations: + required: true + + - type: input + id: actual + attributes: + label: Actual result + placeholder: e.g., "The button appears disabled and no progress is shown." + validations: + required: true + + - type: textarea + id: variations + attributes: + label: Variations tried (optional) + description: Different browsers, devices, providers, or settings you tried. + placeholder: e.g., "Tried Chrome/Firefox, disabling dark theme, switching providers." - type: input id: version @@ -19,17 +73,17 @@ body: - type: dropdown id: provider attributes: - label: API Provider + label: API Provider (optional) options: - Anthropic - - AWS Bedrock + - Amazon Bedrock - Chutes AI - DeepSeek - - Glama + - Featherless AI + - Fireworks AI - Google Gemini - Google Vertex AI - Groq - - Human Relay Provider - LiteLLM - LM Studio - Mistral AI @@ -43,57 +97,23 @@ body: - VS Code Language Model API - xAI (Grok) - Not Applicable / Other - validations: - required: true - type: input id: model attributes: - label: Model Used + label: Model Used (optional) description: Exact model name (e.g., Claude 3.7 Sonnet). Use N/A if irrelevant. - validations: - required: true - type: textarea id: roo-code-tasks attributes: - label: Roo Code Task Links (Optional) - description: | - If you have any publicly shared task links that demonstrate the issue, please paste them here. - This helps maintainers understand the context. - Example: https://app.roocode.com/share/task-id + label: Roo Code Task Links (optional) + description: If you have any publicly shared Roo Code task links that demonstrate the issue, paste them here. placeholder: Paste your Roo Code share links here, one per line - - type: textarea - id: steps - attributes: - label: 🔁 Steps to Reproduce - description: | - Help us see what you saw. Give clear, numbered steps: - - 1. Setup (OS, extension version, settings) - 2. Exact actions (clicks, input, files, commands) - 3. What happened after each step - - Think like you're writing a recipe. Without this, we can't reproduce the issue. - validations: - required: true - - - type: textarea - id: what-happened - attributes: - label: 💥 Outcome Summary - description: | - Recap what went wrong in one or two lines. - - Example: "Expected code to run, but got an empty response and no error." - placeholder: Expected ___, but got ___. - validations: - required: true - - type: textarea id: logs attributes: - label: 📄 Relevant Logs or Errors (Optional) - description: Paste API logs, terminal output, or errors here. Use triple backticks (```) for code formatting. + label: Relevant logs or errors (optional) + description: Paste relevant output or errors. Use triple backticks (```) for formatting. render: shell diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml index 4863f9ffa61..7e140ec08cc 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -1,71 +1,47 @@ -name: Detailed Feature Proposal -description: Report a specific problem that needs solving in Roo Code -labels: ["proposal", "enhancement"] +name: Enhancement Request +description: Propose an improvement in plain language focused on user benefit +labels: ["enhancement"] +title: "[ENHANCEMENT] " body: - type: markdown attributes: value: | - **Thank you for submitting a feature request for Roo Code!** - - This template helps you describe problems that need solving. Focus on the problem - the Roo team will work to design solutions unless you want to contribute the implementation yourself. - - **Quality over speed:** We prefer detailed, clear problem descriptions over quick ones. Vague requests often get closed or require multiple rounds of clarification, which wastes everyone's time. - - **Before submitting:** - - Search existing [Issues](https://github.com/RooCodeInc/Roo-Code/issues) and [Discussions](https://github.com/RooCodeInc/Roo-Code/discussions) to avoid duplicates - - For general ideas, use [GitHub Discussions](https://github.com/RooCodeInc/Roo-Code/discussions/categories/feature-requests) instead of this template. + Thank you for helping improve Roo Code! + Please focus on the problem and the desired behavior in plain language. - - type: markdown + - type: textarea + id: problem attributes: - value: | - ## ❌ Common mistakes that lead to request rejection: - - **Vague problem descriptions:** "UI is bad" -> Should be: "Submit button is invisible on dark theme" - - **Missing user impact:** "This would be cool" -> Should explain who benefits and how - - **No specific context:** Describe exactly when and how the problem occurs - + label: Problem (one or two sentences) + description: What problem are users facing? + placeholder: e.g., "Users often click Copy Run by mistake and duplicate runs unintentionally." + validations: + required: true - type: textarea - id: problem-description + id: context attributes: - label: What specific problem does this solve? - description: | - **Be concrete and detailed.** Explain the problem from a user's perspective. - - ✅ **Good examples (specific, clear impact):** - - "When running large tasks, users wait 5+ minutes because tasks execute sequentially instead of in parallel, blocking productivity" - - "AI can only read one file per request, forcing users to make multiple requests for multi-file projects, increasing wait time from 30s to 5+ minutes" - - "Dark theme users can't see the submit button because it uses white text on light grey background" - - ❌ **Poor examples (vague, unclear impact):** - - "The UI looks weird" -> What specifically looks weird? On which screen? What's the impact? - - "System prompt is not good" -> What's wrong with it? What behaviour does it cause? What should it do instead? - - "Performance could be better" -> Where? How slow is it currently? What's the user impact? - - **Your problem description should answer:** - - Who is affected? (all users, specific user types, etc.) - - When does this happen? (specific scenarios/steps) - - What's the current behaviour vs expected behaviour? - - What's the impact? (time wasted, errors caused, etc.) - placeholder: Be specific about the problem, who it affects, and the impact. Avoid generic statements like "it's slow" or "it's confusing." + label: Context (who is affected and when) + description: Who encounters this and in what situation? + placeholder: e.g., "Happens when browsing the Runs list; most visible for new users." validations: required: true - - type: textarea - id: additional-context + id: desired attributes: - label: Additional context (optional) - description: Mockups, screenshots, links, user quotes, or other relevant information that supports your proposal. + label: Desired behavior (conceptual, not technical) + description: Describe what should happen in simple terms. + placeholder: e.g., "Ask for confirmation before copying a run." + validations: + required: true - type: textarea - id: roo-code-tasks + id: constraints attributes: - label: Roo Code Task Links (Optional) - description: | - If you used Roo Code to explore this feature request or develop solutions, share the public task links here. - This helps maintainers understand the context and any exploration you've done. - Example: https://app.roocode.com/share/task-id - placeholder: Paste your Roo Code share links here, one per line + label: Constraints / preferences (optional) + description: Any considerations like performance, accessibility, or UX expectations. + placeholder: e.g., "Keep it quick and unobtrusive; keyboard accessible." - type: checkboxes id: checklist @@ -74,128 +50,42 @@ body: options: - label: I've searched existing Issues and Discussions for duplicates required: true - - label: This describes a specific problem with clear impact and context + - label: This describes a specific problem with clear context and impact required: true + - type: textarea + id: roo-code-tasks + attributes: + label: Roo Code Task Links (optional) + description: If you explored this with Roo Code, share public task links for context. + placeholder: Paste your Roo Code share links here, one per line + - type: markdown attributes: value: | --- - - ## 🛠️ **Optional: Contributing & Technical Analysis** - - **🎯 Just reporting a problem?** You can click "Submit new issue" right now! The sections below are only needed if you want to contribute a solution via pull request. - - **⚠️ Only continue if you want to:** - - Propose a specific solution design - - Implement the feature yourself via pull request - - Provide technical analysis to help with implementation - - **For contributors who continue:** - - A maintainer (especially @hannesrudolph) will review this proposal. **Do not start implementation until approved and assigned.** We're a small team with limited resources, so every code addition needs careful consideration. We're always happy to receive clear, actionable proposals though! - - Join [Discord](https://discord.gg/roocode) and DM **Hannes Rudolph** (`hrudolph`) for guidance on implementation - - Check our [Roadmap](https://github.com/orgs/RooCodeInc/projects/1/views/1?query=sort%3Aupdated-desc+is%3Aopen&filterQuery=is%3Aissue%2Copen%2Cclosed+label%3A%22feature+request%22+status%3A%22Issue+%5BUnassigned%5D%22%2C%22Issue+%5BIn+Progress%5D%22) to see open feature requests ready to be implemented or currently being worked on - - - type: checkboxes - id: willingness-to-contribute - attributes: - label: Interested in implementing this? - description: | - **Important:** If you check "Yes" below, the technical sections become REQUIRED. - We need detailed technical analysis from contributors to ensure quality implementation. - options: - - label: Yes, I'd like to help implement this feature - required: false - - - type: checkboxes - id: implementation-approval - attributes: - label: Implementation requirements - options: - - label: I understand this needs approval before implementation begins - required: false - - - type: textarea - id: proposed-solution - attributes: - label: How should this be solved? (REQUIRED if contributing, optional otherwise) - description: | - **If you want to implement this feature, this section is REQUIRED.** - - **Describe your solution in detail.** Explain not just what to build, but how it should work. - - ✅ **Good examples:** - - "Add parallel task execution: Allow up to 3 tasks to run simultaneously with a queue system for additional tasks. Show progress for each active task in the UI." - - "Enable multi-file AI processing: Modify the request handler to accept multiple files in a single request and process them together, reducing round trips." - - "Fix button contrast: Change submit button to use primary colour on dark theme (white text on blue background) instead of current grey." - - ❌ **Poor examples:** - - "Make it faster" -> How? What specific changes? - - "Improve the UI" -> Which part? What specific improvements? - - "Fix the prompt" -> What should the new prompt do differently? - - **Your solution should explain:** - - What exactly will change? - - How will users interact with it? - - What will the new behaviour look like? - placeholder: Describe the specific changes and how they will work. Include user interaction details if relevant. + Optional (for contributors): You can stop here if you're just proposing the improvement. - type: textarea id: acceptance-criteria attributes: - label: How will we know it works? (Acceptance Criteria - REQUIRED if contributing, optional otherwise) - description: | - **If you want to implement this feature, this section is REQUIRED.** - - **This is crucial - don't skip it.** Define what "working" looks like with specific, testable criteria. - - **Format suggestion:** - ``` - Given [context/situation] + label: Acceptance criteria (optional) + description: Define what “working” looks like with specific, testable outcomes. + placeholder: | + Given [context] When [user action] Then [expected result] And [additional expectations] But [what should NOT happen] - ``` - - **Example:** - ``` - Given I have 5 large tasks to run - When I start all of them - Then they execute in parallel (max 3 at once, can be configured) - And I see progress for each active task - And queued tasks show "waiting" status - But the UI doesn't freeze or become unresponsive - ``` - placeholder: | - Define specific, testable criteria. What should users be able to do? What should happen? What should NOT happen? - Use the Given/When/Then format above or your own clear structure. - type: textarea - id: technical-considerations + id: proposed-solution attributes: - label: Technical considerations (REQUIRED if contributing, optional otherwise) - description: | - **If you want to implement this feature, this section is REQUIRED.** - - Share technical insights that could help planning: - - Implementation approach or architecture changes - - Performance implications - - Compatibility concerns - - Systems that might be affected - - Potential blockers you can foresee - placeholder: e.g., "Will need to refactor task manager", "Could impact memory usage on large files", "Requires a large portion of code to be rewritten" + label: Proposed approach (optional) + description: If you have an idea, describe it briefly in plain language. - type: textarea - id: trade-offs-and-risks + id: risks attributes: - label: Trade-offs and risks (REQUIRED if contributing, optional otherwise) - description: | - **If you want to implement this feature, this section is REQUIRED.** - - What could go wrong or what alternatives did you consider? - - Alternative approaches and why you chose this one - - Potential negative impacts (performance, UX, etc.) - - Breaking changes or migration concerns - - Edge cases that need careful handling - placeholder: 'e.g., "Alternative: use library X but it is 500KB larger", "Risk: might slow older devices", "Breaking: changes API response format"' + label: Trade-offs / risks (optional) + description: Potential downsides or alternatives considered. diff --git a/.github/workflows/code-qa.yml b/.github/workflows/code-qa.yml index ba85a01b215..f8ac0c8642b 100644 --- a/.github/workflows/code-qa.yml +++ b/.github/workflows/code-qa.yml @@ -86,22 +86,38 @@ jobs: - name: Create .env.local file working-directory: apps/vscode-e2e run: echo "OPENROUTER_API_KEY=${{ secrets.OPENROUTER_API_KEY }}" > .env.local + - name: Set VS Code test version + run: echo "VSCODE_VERSION=1.101.2" >> $GITHUB_ENV + - name: Cache VS Code test runtime + uses: actions/cache@v4 + with: + path: apps/vscode-e2e/.vscode-test + key: ${{ runner.os }}-vscode-test-${{ env.VSCODE_VERSION }} + - name: Pre-download VS Code test runtime with retry + working-directory: apps/vscode-e2e + run: | + for attempt in 1 2 3; do + echo "Download attempt $attempt of 3..." + node -e " + const { downloadAndUnzipVSCode } = require('@vscode/test-electron'); + downloadAndUnzipVSCode({ version: process.env.VSCODE_VERSION || '1.101.2' }) + .then(() => { + console.log('✅ VS Code test runtime downloaded successfully'); + process.exit(0); + }) + .catch(err => { + console.error('❌ Failed to download VS Code (attempt $attempt):', err); + process.exit(1); + }); + " && break || { + if [ $attempt -eq 3 ]; then + echo "All download attempts failed" + exit 1 + fi + echo "Retrying in 5 seconds..." + sleep 5 + } + done - name: Run integration tests working-directory: apps/vscode-e2e run: xvfb-run -a pnpm test:ci - - notify-slack-on-failure: - runs-on: ubuntu-latest - needs: [check-translations, knip, compile, unit-test, integration-test] - if: ${{ always() && github.event_name == 'push' && github.ref == 'refs/heads/main' && contains(needs.*.result, 'failure') }} - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Send Slack notification on failure - uses: ./.github/actions/slack-notify - with: - webhook-url: ${{ secrets.SLACK_WEBHOOK_URL }} - channel: "#ci" - workflow-name: "Code QA" - failed-jobs: ${{ toJSON(needs) }} diff --git a/.github/workflows/discord-pr-notify.yml b/.github/workflows/discord-pr-notify.yml deleted file mode 100644 index 88c918edfe3..00000000000 --- a/.github/workflows/discord-pr-notify.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: Discord PR Notifier - -on: - workflow_dispatch: - pull_request_target: - types: [opened] - -jobs: - notify: - runs-on: ubuntu-latest - if: github.head_ref != 'changeset-release/main' - steps: - - name: Send Discord Notification - run: | - PAYLOAD=$(jq -n \ - --arg title "${{ github.event.pull_request.title }}" \ - --arg url "${{ github.event.pull_request.html_url }}" \ - --arg author "${{ github.event.pull_request.user.login }}" \ - '{ - content: ("🚀 **New PR:** " + $title + "\n🔗 <" + $url + ">\n👤 **Author:** " + $author), - thread_name: ($title + " by " + $author) - }') - - curl -X POST "${{ secrets.DISCORD_WEBHOOK }}" \ - -H "Content-Type: application/json" \ - -d "$PAYLOAD" diff --git a/.github/workflows/update-contributors.yml b/.github/workflows/update-contributors.yml index c3c9327607b..5709bdc10a0 100644 --- a/.github/workflows/update-contributors.yml +++ b/.github/workflows/update-contributors.yml @@ -1,46 +1,67 @@ -name: Update Contributors +name: Update Contributors # Refresh contrib.rocks image cache on: - push: - branches: - - main workflow_dispatch: +permissions: + contents: write + pull-requests: write + jobs: - update-contributors: + refresh-contrib-cache: runs-on: ubuntu-latest - permissions: - contents: write # Needed for pushing changes. - pull-requests: write # Needed for creating PRs. steps: - - name: Checkout code + - name: Checkout uses: actions/checkout@v4 - - name: Setup Node.js and pnpm - uses: ./.github/actions/setup-node-pnpm - - name: Disable Husky + + - name: Bump cacheBust in all README files run: | - echo "HUSKY=0" >> $GITHUB_ENV - git config --global core.hooksPath /dev/null - - name: Update contributors and format + set -euo pipefail + TS="$(date +%s)" + # Target only the root README.md and localized READMEs under locales/*/README.md + mapfile -t FILES < <(git ls-files README.md 'locales/*/README.md' || true) + + if [ "${#FILES[@]}" -eq 0 ]; then + echo "No target README files found." >&2 + exit 1 + fi + + UPDATED=0 + for f in "${FILES[@]}"; do + if grep -q 'cacheBust=' "$f"; then + # Use portable sed in GNU environment of ubuntu-latest + sed -i -E "s/cacheBust=[0-9]+/cacheBust=${TS}/g" "$f" + echo "Updated cacheBust in $f" + UPDATED=1 + else + echo "Warning: cacheBust parameter not found in $f" >&2 + fi + done + + if [ "$UPDATED" -eq 0 ]; then + echo "No files were updated. Ensure READMEs embed contrib.rocks with cacheBust param." >&2 + exit 1 + fi + + - name: Detect changes + id: changes run: | - pnpm update-contributors - npx prettier --write README.md locales/*/README.md - if git diff --quiet; then echo "changes=false" >> $GITHUB_OUTPUT; else echo "changes=true" >> $GITHUB_OUTPUT; fi - id: check-changes - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + if git diff --quiet; then + echo "changed=false" >> $GITHUB_OUTPUT + else + echo "changed=true" >> $GITHUB_OUTPUT + fi + - name: Create Pull Request - if: steps.check-changes.outputs.changes == 'true' + if: steps.changes.outputs.changed == 'true' uses: peter-evans/create-pull-request@v7 with: token: ${{ secrets.GITHUB_TOKEN }} commit-message: "docs: update contributors list [skip ci]" committer: "github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>" - branch: update-contributors + branch: refresh-contrib-cache delete-branch: true - title: "Update contributors list" + title: "Refresh contrib.rocks image cache (all READMEs)" body: | - Automated update of contributors list and related files - - This PR was created automatically by a GitHub Action workflow and includes all changed files. + Automated refresh of the contrib.rocks image cache by bumping the cacheBust parameter in README.md and locales/*/README.md. base: main diff --git a/.gitignore b/.gitignore index 65c201c3c25..54cf66cee7a 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,9 @@ logs .qodo/ .vercel .roo/mcp.json + +# Qdrant +qdrant_storage/ + +# Architect plans +plans/ \ No newline at end of file diff --git a/.husky/pre-push b/.husky/pre-push index 3c206835b73..4cf91d95800 100644 --- a/.husky/pre-push +++ b/.husky/pre-push @@ -18,6 +18,19 @@ fi $pnpm_cmd run check-types +# Use dotenvx to securely load .env.local and run commands that depend on it +if [ -f ".env.local" ]; then + # Check if RUN_TESTS_ON_PUSH is set to true and run tests with dotenvx + if npx dotenvx get RUN_TESTS_ON_PUSH -f .env.local 2>/dev/null | grep -q "^true$"; then + npx dotenvx run -f .env.local -- $pnpm_cmd run test + fi +else + # Fallback: run tests if RUN_TESTS_ON_PUSH is set in regular environment + if [ "$RUN_TESTS_ON_PUSH" = "true" ]; then + $pnpm_cmd run test + fi +fi + # Check for new changesets. NEW_CHANGESETS=$(find .changeset -name "*.md" ! -name "README.md" | wc -l | tr -d ' ') echo "Changeset files: $NEW_CHANGESETS" diff --git a/.roo/commands/release.md b/.roo/commands/release.md index ec54b804d1d..2e09783a58e 100644 --- a/.roo/commands/release.md +++ b/.roo/commands/release.md @@ -1,10 +1,11 @@ --- description: "Create a new release of the Roo Code extension" argument-hint: patch | minor | major +mode: code --- 1. Identify the SHA corresponding to the most recent release using GitHub CLI: `gh release view --json tagName,targetCommitish,publishedAt` -2. Analyze changes since the last release using: `gh pr list --state merged --json number,title,author,url,mergedAt,closingIssuesReferences --limit 1000 -q '[.[] | select(.mergedAt > "TIMESTAMP") | {number, title, author: .author.login, url, mergedAt, issues: .closingIssuesReferences}] | sort_by(.number)'` +2. Analyze changes since the last release using: `gh pr list --state merged --base main --json number,title,author,url,mergedAt,closingIssuesReferences --limit 1000 -q '[.[] | select(.mergedAt > "TIMESTAMP") | {number, title, author: .author.login, url, mergedAt, issues: .closingIssuesReferences}] | sort_by(.number)'` 3. For each PR with linked issues, fetch the issue details to get the issue reporter: `gh issue view ISSUE_NUMBER --json number,author -q '{number, reporter: .author.login}'` 4. Summarize the changes. If the user did not specify, ask them whether this should be a major, minor, or patch release. 5. Create a changeset in .changeset/v[version].md instead of directly modifying package.json. The format is: @@ -16,25 +17,29 @@ argument-hint: patch | minor | major [list of changes] ``` -- Always include contributor attribution using format: (thanks @username!) -- For PRs that close issues, also include the issue number and reporter: "- Fix: Description (#123 by @reporter, PR by @contributor)" -- For PRs without linked issues, use the standard format: "- Add support for feature (thanks @contributor!)" +- Always include contributor attribution and the PR number: use "(PR # by @username)". +- For PRs that close issues, include both the issue number and the PR number and authors: "- Fix: Description (#123 by @reporter, PR #456 by @contributor)" +- For PRs without linked issues, include the PR number and author: "- Add support for feature (PR #456 by @contributor)" - Provide brief descriptions of each item to explain the change - Order the list from most important to least important - Example formats: - - With issue: "- Fix: Resolve memory leak in extension (#456 by @issueReporter, PR by @prAuthor)" - - Without issue: "- Add support for Gemini 2.5 Pro caching (thanks @contributor!)" + - With issue: "- Fix: Resolve memory leak in extension (#456 by @issueReporter, PR #789 by @prAuthor)" + - Without issue: "- Add support for Gemini 2.5 Pro caching (PR #789 by @contributor)" - CRITICAL: Include EVERY SINGLE PR in the changeset - don't assume you know which ones are important. Count the total PRs to verify completeness and cross-reference the list to ensure nothing is missed. -6. If a major or minor release: +6. If the generate_image tool is available, create a release image at `releases/[version]-release.png` + - The image should feature a realistic-looking kangaroo doing something human-like that relates to the main highlight of the release + - Pass `releases/template.png` as the reference image for aspect ratio and kangaroo style + - Add the generated image to .changeset/v[version].md before the list of changes with format: `![X.Y.Z Release - Description](/releases/X.Y.Z-release.png)` +7. If a major or minor release: - Ask the user what the three most important areas to highlight are in the release - Update the English version relevant announcement files and documentation (webview-ui/src/components/chat/Announcement.tsx, README.md, and the `latestAnnouncementId` in src/core/webview/ClineProvider.ts) - Ask the user to confirm that the English version looks good to them before proceeding - Use the new_task tool to create a subtask in `translate` mode with detailed instructions of which content needs to be translated into all supported languages (The READMEs as well as the translation strings) -7. Create a new branch for the release preparation: `git checkout -b release/v[version]` -8. Commit and push the changeset file and any documentation updates to the repository: `git add . && git commit -m "chore: add changeset for v[version]" && git push origin release/v[version]` -9. Create a pull request for the release: `gh pr create --title "Release v[version]" --body "Release preparation for v[version]. This PR includes the changeset and any necessary documentation updates." --base main --head release/v[version]` -10. The GitHub Actions workflow will automatically: +8. Create a new branch for the release preparation: `git checkout -b release/v[version]` +9. Commit and push the changeset file and any documentation updates to the repository: `git add . && git commit -m "chore: add changeset for v[version]" && git push origin release/v[version]` +10. Create a pull request for the release: `gh pr create --title "Release v[version]" --body "Release preparation for v[version]. This PR includes the changeset and any necessary documentation updates." --base main --head release/v[version]` +11. The GitHub Actions workflow will automatically: - Create a version bump PR when changesets are merged to main - Update the CHANGELOG.md with proper formatting - Publish the release when the version bump PR is merged diff --git a/.roo/roomotes.yml b/.roo/roomotes.yml index af0cd85b401..0ea30b93af7 100644 --- a/.roo/roomotes.yml +++ b/.roo/roomotes.yml @@ -1,33 +1,6 @@ version: "1.0" commands: - - name: Pull latest changes - run: git pull - timeout: 60 - execution_phase: task_run - name: Install dependencies run: pnpm install timeout: 60 - execution_phase: task_run - -github_events: - - event: issues.opened - action: - name: github.issue.fix - - event: issue_comment.created - action: - name: github.issue.comment.respond - - event: pull_request.opened - action: - name: github.pr.review - - event: pull_request.opened - action: - name: general.task - prompt: | - 1. Run the script `node scripts/find-missing-translations.js` and carefully review its output for any missing translations. - 2. If the script reports missing translations, switch into `translate` mode and add them in all supported languages. - 3. If you've added new translations, commit and push them to the existing PR. - 4. If you get a permission error trying to push to the PR just give up (i.e don't create a new PR instead). - - event: pull_request_review_comment.created - action: - name: github.pr.comment.respond diff --git a/.roo/rules-docs-extractor/1_extraction_workflow.xml b/.roo/rules-docs-extractor/1_extraction_workflow.xml index 088fc1ae89f..c707fa78092 100644 --- a/.roo/rules-docs-extractor/1_extraction_workflow.xml +++ b/.roo/rules-docs-extractor/1_extraction_workflow.xml @@ -1,296 +1,99 @@ - The Docs Extractor mode has two primary functions: - 1. Extract technical and non-technical details about features to provide to documentation teams - 2. Verify existing documentation for factual accuracy against the codebase - - This mode does not generate final documentation but provides detailed analysis and verification. + The Docs Extractor mode has exactly two workflow paths: + 1) Verify provided documentation for factual accuracy against the codebase + 2) Generate source material for user-facing docs about a requested feature or aspect of the codebase + + Outputs are designed to support explanatory documentation (not merely descriptive): + - Capture why users need steps and why certain actions are restricted + - Surface constraints, limitations, and trade‑offs + - Provide troubleshooting playbooks (symptoms → causes → fixes → prevention) + - Recommend targeted visuals for complex states (not step‑by‑step screenshots) + + This mode does not generate final user documentation; it produces verification and source-material reports for docs teams. Parse Request - Identify the feature or component in the user's request. - Determine if the request is for extraction or verification. - For extraction: Note what level of detail is needed (technical vs non-technical). - For verification: Identify the documentation to be verified. + Identify the feature/aspect in the user's request. + Decide path: verification vs. source-material generation. + For source-material: capture audience (user or developer) and depth (overview vs task-focused). + For verification: identify the documentation to be verified (provided text/links/files). Note any specific areas to emphasize or check. - The mode branches into extraction or verification based on the request. Discover Feature - Locate relevant code using appropriate search methods. - Identify entry points and components. - Map the high-level architecture. - Use any combination of tools to understand the feature. + Locate relevant code and assets using appropriate discovery methods. + Identify entry points and key components that affect user experience. + Map the high-level workflow a user follows. - Use the most effective discovery method for the situation - file exploration, search, or direct navigation. - - - Code Analysis - - - Analyze code structure -
- - Identify classes, functions, modules - - Extract method signatures, parameters - - Document return types, data structures - - Map inheritance and composition -
-
- - Extract APIs -
- - REST endpoints - - GraphQL schemas - - WebSocket events - - RPC interfaces -
-
- - Document configuration -
- - Environment variables - - Config files and schemas - - Feature flags - - Runtime parameters -
-
-
-
- - - UI/UX and User Experience Analysis - - - Analyze user interface components -
- - UI components and their interactions - - Forms, buttons, navigation elements - - Visual feedback and loading states - - Responsive design considerations - - Accessibility features -
-
- - Map user journeys and interactions -
- - Step-by-step user workflows - - Click paths and navigation flows - - User decision points - - Input validation and error messaging - - Success and failure scenarios -
-
- - Document user experience elements -
- - Page layouts and information architecture - - Interactive elements and their behaviors - - Tooltips, help text, and guidance - - Confirmation dialogs and warnings - - Progress indicators and status updates -
-
- - Capture visual and behavioral patterns -
- - Color schemes and theming - - Animation and transitions - - Keyboard shortcuts and accessibility - - Mobile vs desktop experiences - - Browser-specific considerations -
-
-
-
- - - Business Logic Extraction - - - Map workflows from user perspective -
- - User journey through the feature - - Decision points and branching - - State transitions visible to users - - Roles and permissions affecting UI -
-
- - Document business rules -
- - Validation logic and user feedback - - Formulas and algorithms - - Business process implementations - - Compliance requirements -
-
- - Identify use cases -
- - Primary use cases - - Edge cases - - Error scenarios and user recovery - - Performance factors affecting UX -
-
-
-
- - - Dependency Analysis - - - Map dependencies -
- - Third-party libraries - - External services and APIs - - Database connections - - Message queues -
-
- - Document integration points -
- - Incoming webhooks - - Outgoing API calls - - Event publishers/subscribers - - Shared data stores -
-
- - Analyze data flow -
- - Data sources and formats - - Data transformations - - Output formats and destinations - - Data retention policies -
-
-
-
- - - Test Analysis - - - Assess test coverage -
- - Unit test coverage - - Integration test scenarios - - End-to-end test flows - - Performance test results -
-
- - Document error handling -
- - Error types and codes - - Exception handling - - Fallback mechanisms - - Recovery procedures -
-
- - Identify quality metrics -
- - Code complexity - - Performance benchmarks - - Security vulnerabilities - - Maintainability scores -
-
-
-
- - - Security Analysis - - - Document security -
- - Auth mechanisms - - Access control - - Data encryption - - Security policies -
-
- - Identify vulnerabilities -
- - Known security issues - - Attack vectors - - Mitigation - - Best practices -
-
- - Check compliance -
- - Regulatory compliance (GDPR, etc.) - - Industry standards - - Audit trail requirements - - Data privacy -
-
-
-
-
+ + UI components and their interactions + User workflows and decision points + Configuration that changes user-visible behavior + Error states, messages, and recovery + Benefits, limits, prerequisites, and version notes + Why this exists: user goals, constraints, and design intent + “Cannot do” boundaries: permissions, invariants, and business rules + Troubleshooting: symptoms, likely causes, diagnostics, fixes, prevention + Common pitfalls and anti‑patterns (what to avoid and why) + Decision rationale and trade‑offs that affect user choices + Complex UI states that merit visuals (criteria for screenshots/diagrams) + - - Extract Feature Details - Analyze and extract comprehensive details for documentation team + + Generate Source Material for User-Facing Docs + Extract concise, user-oriented facts and structure them for documentation teams. - Compile Technical Details + Scope and Audience - List all technical components and their relationships - Document APIs, data structures, and algorithms - Extract configuration options and their impacts - Identify error handling and edge cases - Note performance characteristics and limitations + Confirm the feature/aspect and intended audience. + List primary tasks the audience performs with this feature. - Extract Non-Technical Information + Extract User-Facing Facts - Describe complete user experience and workflows - Document UI interactions and visual elements - Explain business logic in plain language - Identify user benefits and use cases - Document common scenarios with UI context - Note prerequisites and user-facing dependencies - Capture error messages and user guidance + Summarize what the feature does and key benefits. + Explain why users need this (jobs-to-be-done, outcomes) and when to use it. + Document step-by-step user workflows and UI interactions. + Capture configuration options that impact user behavior (name, default, effect). + Clarify constraints, limits, and “cannot do” cases with rationale. + Identify common pitfalls and anti-patterns; include “Do/Don’t” guidance. + List common errors with user-facing messages, diagnostics, fixes, and prevention. + Record prerequisites, permissions, and compatibility/version notes. + Flag complex states that warrant visuals (what to show and why), not every step. - Create Extraction Report + Create Source Material Report - Organize findings into clear categories - Separate technical and non-technical information - Include code snippets and examples where helpful - Create `EXTRACTION-[feature].md` with findings - Highlight areas that need special attention in documentation + Organize findings using user-focused structure (benefits, use cases, how it works, configuration, FAQ, troubleshooting). + Include short code/UI snippets or paths where relevant. + Create `EXTRACTION-[feature].md` with findings. + Highlight items that need visuals (screenshots/diagrams). - - Executive summary of the feature - - UI/UX analysis and user experience - - Technical details section - - Non-technical/user-facing details + - Executive summary of the feature/aspect + - Why it matters (goals, value, when to use) - User workflows and interactions - - Configuration and setup information - - Common use cases with UI context - - Error handling and user guidance - - Potential documentation considerations + - Configuration and setup affecting users (with defaults and impact) + - Constraints and limitations (with rationale) + - Common scenarios and troubleshooting playbooks (symptoms → causes → fixes → prevention) + - Do/Don’t and anti‑patterns + - Recommended visuals (what complex states to illustrate and why) + - FAQ and tips + - Version/compatibility notes @@ -298,44 +101,43 @@ Verify Documentation Accuracy - Check existing documentation against codebase reality + Check provided documentation against codebase reality and actual UX. Analyze Provided Documentation - Parse the documentation to identify claims and descriptions - Extract technical specifications mentioned - Note user-facing features and workflows described - Identify configuration options and examples provided + Parse the documentation to identify claims and descriptions. + Extract technical or user-facing specifics mentioned. + Note workflows, configuration, and examples described. Verify Against Codebase - Check technical claims against actual implementation - Verify API endpoints, parameters, and responses - Confirm configuration options and defaults - Validate code examples and snippets - Check if described workflows match implementation + Check claims against actual implementation and UX. + Verify endpoints/parameters if referenced. + Confirm configuration options and defaults. + Validate code snippets and examples. + Ensure described workflows match implementation. Create Verification Report - Categorize findings by severity (Critical, Major, Minor) - List all inaccuracies with correct information - Identify missing important information - Note outdated or deprecated content - Provide specific corrections and suggestions - Create `VERIFICATION-[feature].md` with findings + Categorize findings by severity (Critical, Major, Minor). + List inaccuracies with the correct information. + Identify missing important information. + Provide specific corrections and suggestions. + Create `VERIFICATION-[feature].md` with findings. - Verification summary (Accurate/Needs Updates) - Critical inaccuracies that could mislead users - - Technical corrections needed - - Missing information that should be added + - Corrections and missing information + - Explanatory gaps (missing “why”, constraints, or decision rationale) + - Troubleshooting coverage gaps (missing symptoms/diagnostics/fixes/prevention) + - Visual recommendations (which complex states warrant screenshots/diagrams) - Suggestions for clarity improvements - - Overall recommendations @@ -343,13 +145,13 @@ - - All code paths analyzed - Technical details comprehensively extracted - Non-technical information clearly explained - Use cases and examples provided + + Audience and scope captured + User workflows and UI interactions documented + User-impacting configuration recorded + Common errors and troubleshooting documented Report organized for documentation team use - + All documentation claims verified Inaccuracies identified and corrected diff --git a/.roo/rules-docs-extractor/2_documentation_patterns.xml b/.roo/rules-docs-extractor/2_documentation_patterns.xml index ef1643d8a40..da743483dab 100644 --- a/.roo/rules-docs-extractor/2_documentation_patterns.xml +++ b/.roo/rules-docs-extractor/2_documentation_patterns.xml @@ -4,7 +4,7 @@ - # [Feature Name] [Description of what the feature does and why a user should care.] @@ -22,7 +22,7 @@ - [Pain point 1] - [Pain point 2] -**With this feature]**: [Description of the new experience.] +**With this feature**: [Description of the new experience.] ## How it Works @@ -58,9 +58,9 @@ - [Answer.] - [Optional tip.] - ]]> + - # [Feature Name] Technical Documentation ## Table of Contents @@ -71,28 +71,27 @@ 5. Configuration 6. User Guide 7. Developer Guide -8. Administrator Guide -9. Security -10. Performance -11. Troubleshooting -12. FAQ -13. Changelog -14. References - -[This template remains available for generating detailed technical documentation.] - ]]> +8. Security +9. Performance +10. Troubleshooting +11. FAQ +12. Changelog +13. References + +[Use this as an internal source-material outline for technical sections; not for final docs.] + - + @@ -101,7 +100,7 @@ - + @@ -123,7 +122,7 @@ - + - + - + Tutorials Use cases @@ -179,60 +178,31 @@ - - - Deployment - Monitoring - Security hardening - Backup and recovery - - - - - - - Business value - Capabilities and limits - Competitive advantages - Risk assessment - - - - + - + - + - + - + - + - + @@ -368,20 +338,20 @@ config: - + - + \ No newline at end of file diff --git a/.roo/rules-docs-extractor/3_analysis_techniques.xml b/.roo/rules-docs-extractor/3_analysis_techniques.xml index b9ef93d1f30..12b3d1fd266 100644 --- a/.roo/rules-docs-extractor/3_analysis_techniques.xml +++ b/.roo/rules-docs-extractor/3_analysis_techniques.xml @@ -1,55 +1,40 @@ - Techniques for analyzing code to extract documentation. + Heuristics for analyzing a codebase to extract reliable, user-facing documentation. + This file contains technique checklists only—no tool instructions or invocations. - - Find and analyze UI components and their interactions - - - - Search for UI component files - - -src -\.(tsx|jsx|vue)$|@Component|export.*component -*.tsx - - - - -src - - - ]]> - - - - Analyze styling and visual elements - - -src/styles -true - + Find and analyze UI components and their interactions + + Start from feature or route directories and enumerate components related to the requested topic. + Differentiate container vs presentational components; note composition patterns. + Trace inputs/outputs: props, state, context, events, and side effects. + Record conditional rendering that affects user-visible states. + + + Primary components and responsibilities. + Props/state/context that change behavior. + High-level dependency/composition map. + + - - -src -className=|style=|styled\.|makeStyles|@apply - - ]]> - - + + Analyze styling and visual elements + + Identify design tokens and utility classes used to drive layout and state. + Capture responsive behavior and breakpoint rules that materially change UX. + Document visual affordances tied to state (loading, error, disabled). + + + Key classes/selectors influencing layout/state. + Responsive behavior summary and breakpoints. + - - Map user interactions and navigation flows - + Map user interactions and navigation flows Route definitions and navigation Form submissions and validations @@ -57,31 +42,20 @@ State changes and UI updates Loading and error states - - -src -Route.*path=|router\.push|navigate\(|Link.*to= - - - - -src -onClick=|onSubmit=|onChange=|handleClick|handleSubmit - - - - -src -validate|validation|required|pattern=|minLength|maxLength - - ]]> + + Outline entry points and expected outcomes for each primary flow. + Summarize validation rules and failure states the user can encounter. + Record redirects and deep-link behavior relevant to the feature. + + + Flow diagrams or bullet sequences for main tasks. + Validation conditions and error messages. + Navigation transitions and guards. + - - Analyze how the system communicates with users - + Analyze how the system communicates with users Error messages and alerts Success notifications @@ -90,31 +64,19 @@ Confirmation dialogs Progress indicators - - -src -toast|notification|alert|message|error.*message|success.*message - - - - -src -loading|isLoading|pending|spinner|skeleton|placeholder - - - - -src -modal|dialog|confirm|popup|overlay - - ]]> + + Map message triggers to the user actions that cause them. + Capture severity, persistence, and dismissal behavior. + Note localization or accessibility considerations in messages. + + + Catalog of messages with purpose and conditions. + Loading/progress patterns and timeouts. + - - Check for accessibility features and compliance - + Check for accessibility features and compliance ARIA labels and roles Keyboard navigation support @@ -122,25 +84,17 @@ Focus management Color contrast considerations - - -src -aria-|role=|tabIndex|alt=|title=|accessibilityLabel - - - - -src -focus\(|blur\(|onFocus|onBlur|autoFocus|focusable - - ]]> + + Confirm interactive elements have clear focus and labels. + Describe keyboard-only navigation paths for core flows. + + + Accessibility gaps affecting task completion. + - - Analyze responsive design and mobile experience - + Analyze responsive design and mobile experience Breakpoint definitions Mobile-specific components @@ -148,230 +102,88 @@ Viewport configurations Media queries - - -src -@media|breakpoint|mobile|tablet|desktop|responsive - - - - -src -onTouch|swipe|gesture|tap|press - - ]]> + + Summarize layout changes across breakpoints that alter workflow. + Note touch targets and gestures required on mobile. + + + Table of key differences per breakpoint. + - - - Use semantic search to find conceptually related code when available. - - - Finding code by concept rather than keywords - Discovering implementations across different naming conventions - When pattern-based search isn't finding expected results - - - -user authentication login security JWT token validation - - - - -payment processing transaction billing invoice checkout - - ]]> - This is an optional tool - use when semantic understanding would help find related code that keyword search might miss - - - - Analyze entry points to understand feature flow. - + Understand feature entry points and control flow - Find main functions, controllers, or route handlers. - Trace execution flow. - Map decision branches. - Document input validation. + Identify main functions, controllers, or route handlers. + Trace execution and decision branches. + Document input validation and preconditions. - - - Start by exploring directory structure - - -src -false - - - - -src/controllers -true - - ]]> - - - - Search for specific patterns - - -src -(app\.(get|post|put|delete)|@(Get|Post|Put|Delete)|router\.(get|post|put|delete)) - - ]]> - - - - Read known entry points directly - - -src/app.ts - - - - -src/controllers/feature.controller.ts - - ]]> - - - - Use semantic search as an alternative discovery method - - -main entry point application startup initialization bootstrap - - ]]> - - + + Entry points list and short purpose statements. + Decision matrix or flow sketch. + - - Extract API specifications from code. - + Extract API specifications from code - - - HTTP method - - Route path - - Path/query parameters - - Request/response schemas - - Status codes + HTTP method and route path + Path/query parameters + Request/response schemas + Status codes and error bodies - - - Schema and input types - - Resolvers - - Return types - - Field arguments + Schema and input types + Resolvers and return types + Field arguments and constraints - - Map dependencies and integration points. - + Map dependencies and integration points - Import/require statements - package.json dependencies - External API calls - DB connections - Message queue integrations - Filesystem operations + Imports and module boundaries + Package and runtime dependencies + External API/SDK usage + DB connections and migrations + Messaging/queue/event streams + Filesystem or network side effects - - - Start with package.json to understand dependencies - - -package.json - - ]]> - - - - Follow import chains to map dependencies - - -src -^import\s+.*from\s+['"]([^'"]+)['"]|require\s*\(\s*['"]([^'"]+)['"]\s*\) - - ]]> - - - - Find external API integrations - - -src -(fetch|axios|http\.request|request\(|\.get\(|\.post\() - - ]]> - - + + Dependency graph summary and hot spots. + List of external integrations and auth methods. + - - Extract data models, schemas, and type definitions. - + Extract data models, schemas, and type definitions - - - interfaces, types, classes, enums - + - interfaces, types, classes, enums - - - Schema definitions, migration files, ORM models - + - Schema definitions, migration files, ORM models - - - JSON Schema, Joi/Yup/Zod schemas, validation decorators - + - JSON Schema, Joi/Yup/Zod schemas, validation decorators - - -src -^export\s+(interface|type|class|enum)\s+(\w+) - - - - -src/models -@(Entity|Table|Model)|class\s+\w+\s+extends\s+(Model|BaseEntity) - - ]]> + + Canonical definitions and field constraints. + Entity relationships and ownership. + - - Identify and document business rules. - + Identify and document business rules Complex conditionals Calculation functions @@ -380,79 +192,49 @@ type\s+(Query|Mutation|Subscription)\s*{[^}]+}|@(Query|Mutation|Resolver) Domain-specific constants and algorithms - Why logic exists (business need) - When logic applies (conditions) - What logic does (transformation) - Edge cases + Why the logic exists (business need) + When the logic applies (conditions) + What the logic does (transformation) + Edge cases and invariants Impact of changes - - Document error handling and recovery. - + Document error handling and recovery - try/catch blocks, error boundaries - Custom error classes - Error codes and messages + try/catch blocks and error boundaries + Custom error classes and codes Logging, fallbacks, retries, circuit breakers - - -src -try\s*{|catch\s*\(|throw\s+new|class\s+\w*Error\s+extends - - - - -src -ERROR_|_ERROR|ErrorCode|errorCode - - ]]> + + Error taxonomy and user-facing messages. + Recovery/rollback strategies and timeouts. + - - Identify security measures and vulnerabilities. - + Identify security measures and vulnerabilities - - - - JWT, sessions, OAuth, API keys - - - - - - RBAC, permission checks, ownership validation - - - - - - Encryption, hashing, sensitive data handling - - - - - - Sanitization, SQLi/XSS/CSRF prevention - - + JWT, sessions, OAuth, API keys + RBAC, permission checks, ownership validation + Encryption, hashing, sensitive data handling + Sanitization and injection prevention + + Threat surfaces and mitigations relevant to the feature. + - - Identify performance factors and optimization opportunities. - + Identify performance factors and optimization opportunities - DB query patterns (N+1) + Expensive loops/algorithms + DB query patterns (e.g., N+1) Caching strategies - Async usage - Batch processing - Resource pooling - Memory management - Algorithm complexity + Concurrency and async usage + Batching and resource pooling + Memory management and object lifetimes Time/space complexity @@ -464,51 +246,32 @@ type\s+(Query|Mutation|Subscription)\s*{[^}]+}|@(Query|Mutation|Resolver) - - Analyze test coverage. - + Assess test coverage at a useful granularity - __tests__, *.test.ts, *.spec.ts - Function coverage + Function-level coverage and edge cases - integration/, e2e/ - Workflow coverage + Workflow coverage and contract boundaries - api-tests/, *.api.test.ts - Endpoint coverage + Endpoint success/failure paths and schemas - - -src -\.(test|spec)\.(ts|js|tsx|jsx)$ -*.test.ts - - - - -src -(describe|it|test)\s*\(\s*['"`]([^'"`]+)['"`] - - ]]> + + List of critical behaviors missing tests. + - - Extract configuration options and their impacts. - + Extract configuration options and their impacts .env files, config files, CLI args, feature flags - Default values - Valid values - Behavior impact - Config dependencies + Default values and valid ranges + Behavioral impact of each option + Dependencies between options Security implications @@ -516,54 +279,49 @@ type\s+(Query|Mutation|Subscription)\s*{[^}]+}|@(Query|Mutation|Resolver) - - Map user workflows through the feature. - + Map user workflows through the feature - Identify entry points (UI, API, CLI). - Trace user actions. - Document decision points. - Map data transformations. - Identify outcomes. + Identify entry points (UI, API, CLI) + Trace user actions and decision points + Map data transformations + Identify outcomes and completion criteria - Flow diagrams, procedures, decision trees, state diagrams. + Flow diagrams, procedures, decision trees, state diagrams - - Document integration with other systems. - + Document integration with other systems - Sync API calls, async messaging, events, batch processing, streaming. + Sync API calls, async messaging, events, batch processing, streaming - Protocols, auth, error handling, data transforms, SLAs. + Protocols, auth, error handling, data transforms, SLAs + Summarize version constraints and compatibility - package.json, READMEs, migration guides, breaking changes docs. + package manifests, READMEs, migration guides, breaking changes docs - - -. -"engines":|"peerDependencies":|requires?\s+\w+\s+version|compatible\s+with - - ]]> + + Minimum/recommended versions and notable constraints. + + Track deprecations and migrations - @deprecated, TODO comments, legacy code markers. + Explicit deprecation notices and TODO markers + Legacy code paths and adapters - Deprecation date, removal timeline, migration path, alternatives. + Deprecation date and removal timeline + Migration path and alternatives @@ -571,17 +329,20 @@ type\s+(Query|Mutation|Subscription)\s*{[^}]+}|@(Query|Mutation|Resolver) - Public APIs documented. - Examples for complex features. - Error scenarios covered. - Config options explained. - Security addressed. + Public APIs documented with inputs/outputs and errors + Examples for complex features + Error scenarios covered with recovery guidance + Config options explained with defaults and impacts + Security considerations addressed - - Cyclomatic complexity, code duplication, test coverage, doc coverage, tech debt. + Cyclomatic complexity + Code duplication + Test coverage and gaps + Documentation coverage for user-visible behaviors + Known technical debt affecting UX diff --git a/.roo/rules-docs-extractor/6_communication_guidelines.xml b/.roo/rules-docs-extractor/4_communication_guidelines.xml similarity index 89% rename from .roo/rules-docs-extractor/6_communication_guidelines.xml rename to .roo/rules-docs-extractor/4_communication_guidelines.xml index 8691f2519cb..43ec8479fc6 100644 --- a/.roo/rules-docs-extractor/6_communication_guidelines.xml +++ b/.roo/rules-docs-extractor/4_communication_guidelines.xml @@ -16,17 +16,6 @@ The user explicitly asks for options. - -Found multiple auth systems. Which to document? - -JWT-based system (src/auth/jwt/*) -OAuth2 integration (src/auth/oauth/*) -Basic auth middleware (src/middleware/basic-auth.ts) -All of them - - - ]]> @@ -62,7 +51,7 @@ - + - + @@ -116,7 +105,7 @@ See the full verification report for detailed corrections and suggestions. Always specify language for syntax highlighting (e.g., typescript, json, bash). Include file paths as comments where relevant. - ```typescript // src/auth/auth.service.ts export class AuthService { @@ -125,19 +114,19 @@ export class AuthService { } } ``` - ]]> + Use tables for structured data like configs. Include headers and align columns. Keep cell content brief. - | Variable | Type | Default | Description | |----------|------|---------|-------------| | `JWT_SECRET` | string | - | Secret key for JWT signing | | `JWT_EXPIRATION` | string | '15m' | Token expiration time | - ]]> + @@ -180,14 +169,14 @@ export class AuthService { - --- Feature: Authentication System Version: 2.1.0 Last Updated: 2024-01-15 Status: Stable --- - ]]> + @@ -208,16 +197,11 @@ Status: Stable Standard programming terms. Code snippets, implementation details. - + Instructional, step-by-step. Simple language, no jargon. Screenshots, real-world scenarios. - - Operational focus. - IT/DevOps terms. - CLI examples, configs. - @@ -229,7 +213,7 @@ Status: Stable Recommended next steps. - Feature extraction complete for the authentication system. **Extraction Report**: `EXTRACTION-authentication-system.md` @@ -252,9 +236,9 @@ Feature extraction complete for the authentication system. - Error messages need user-friendly translations The extraction report contains all details needed for comprehensive documentation. - ]]> + - Documentation verification complete for the authentication system. **Verification Report**: `VERIFICATION-authentication-system.md` @@ -271,7 +255,7 @@ Documentation verification complete for the authentication system. **Clarity Improvements**: 3 suggestions Please review the verification report for specific corrections needed. - ]]> + diff --git a/.roo/rules-docs-extractor/4_tool_usage_guide.xml b/.roo/rules-docs-extractor/4_tool_usage_guide.xml deleted file mode 100644 index 50499172730..00000000000 --- a/.roo/rules-docs-extractor/4_tool_usage_guide.xml +++ /dev/null @@ -1,397 +0,0 @@ - - - Guidance on using tools for documentation extraction. - - - - - Use the most appropriate tools for the situation - - Start with what you know - file names, directory structure, or keywords - Use multiple discovery methods to build understanding - Adapt your approach based on the codebase structure - - - - - - Explore directory structure and find relevant files - - - Starting exploration of a feature area - - Understanding project organization - - Finding configuration or test files - - - - - Examine specific files in detail - - - Analyzing implementation details - - Understanding configuration - - Reading documentation or comments - - Read multiple related files together for better context - - - - Find specific patterns or text - - - Locating API endpoints - - Finding configuration usage - - Tracking down error handling - - Discovering cross-references - - - - - Get overview of code structure - - - Understanding module organization - - Identifying main components - - Finding test coverage - - - - - Semantic search when available - - - Finding conceptually related code - - Discovering implementations by functionality - - When keyword search isn't sufficient - - Optional - use when semantic understanding is needed - - - - - - Start from high-level structure and drill down - - List files in feature directory - Identify main entry points - Follow imports and dependencies - Examine implementation details - - - - - Use tests to understand expected behavior - - Find test files for the feature - Read test descriptions and scenarios - Trace back to implementation - Verify behavior matches tests - - - - - Start with configuration to understand setup - - Find configuration files - Identify feature flags and settings - Trace usage in code - Document impacts of each setting - - - - - Map external interfaces first - - Search for route definitions - Find API controllers or handlers - Trace to business logic - Document request/response flow - - - - - - - - Create extraction or verification report files. - Generates reports for documentation teams, not final documentation. - - - For extraction: EXTRACTION-[feature-name].md - - For verification: VERIFICATION-[feature-name].md - - - Use descriptive feature name in filename. - Include table of contents. - Use consistent Markdown formatting. - Include syntax-highlighted code examples. - - -EXTRACTION-authentication-system.md - -# Authentication System Documentation - -## Table of Contents -1. [Overview](#overview) -2. [Architecture](#architecture) -... - -## Overview -The authentication system provides secure user authentication using JWT tokens... - -... - - ]]> - - - - Clarify ambiguous requirements. - - Multiple features have similar names. - Documentation depth is unclear. - Audience priorities are undefined. - - - -Which authentication aspects should be the focus? - -The complete flow (JWT, sessions, OAuth). -Only JWT implementation and validation. -Only OAuth2 integration. -Password reset and recovery workflows. - - - ]]> - -What level of technical detail is needed? - -High-level overview for all audiences. -Detailed developer implementation. -API reference with code examples. -Full coverage for all audiences. - - - ]]> - - - - - - - - Find all files related to a feature using various methods. - - - - Start by exploring likely directories - -src -false - - - - -src/features/[feature-name] -true - - ]]> - - - - Search for feature-related patterns - - -src -feature-name|FeatureName - - - - -src -describe\(['"].*Feature.*['"]|test\(['"].*feature.*['"] -*.test.ts - - ]]> - - - - Find configuration files - - -config -true - - - - -. -feature.*config|settings.*feature -*.json - - ]]> - - - - Use semantic search if available and helpful - - -feature implementation main logic - - ]]> - This is optional - use when other methods aren't sufficient - - - - - - - Follow import chains to map dependencies. - - - Read main file. - Extract all imports. - Read each imported file. - Recursively analyze imports. - Build dependency graph. - - - -src/feature -import\s+(?:{[^}]+}|\*\s+as\s+\w+|\w+)\s+from\s+['"]([^'"]+)['"] - - - - -src/feature -require\(['"]([^'"]+)['"]\) - - ]]> - - - - - Extract API documentation from code. - - - Route definitions, request/response schemas, auth requirements, rate limiting, error responses. - - - - Find route files. - Extract route definitions. - Find controllers. - Analyze request validation. - Document response formats. - - - - - - - Use tests to document expected behavior. - - - Tests provide usage examples. - Test descriptions explain functionality. - Tests cover edge cases. - Tests document expected outputs. - - - -__tests__ -(describe|it|test)\(['"]([^'"]+)['"] - - - - -__tests__/feature.test.ts - - ]]> - - - - - - - .env.example - config/*.json - src/config/* - README.md (configuration section) - - - - - - - Custom error classes - Error code constants - Error message templates - HTTP status codes - - -src -class\s+\w*Error\s+extends|new Error\(|throw new|ERROR_CODE|HTTP_STATUS - - ]]> - - - - - Authentication methods - Authorization rules - Data encryption - Input validation - Rate limiting - - -src -@Authorized|requireAuth|checkPermission|encrypt|decrypt|sanitize|validate|rateLimit - - ]]> - - - - - - Organize output for navigation. - - - Clear hierarchy, consistent headings, ToC with links, cross-references. - - - - - Include relevant code examples. - - - Use syntax highlighting, show request/response, include error cases. - - - - - Suggest diagrams where helpful. - - - Architecture, sequence, data flow, state machine diagrams. - - - - - Include important metadata. - - - Version compatibility, last updated, status, performance, security. - - - - \ No newline at end of file diff --git a/.roo/rules-docs-extractor/5_complete_extraction_examples.xml b/.roo/rules-docs-extractor/5_complete_extraction_examples.xml deleted file mode 100644 index 8c644e2f03a..00000000000 --- a/.roo/rules-docs-extractor/5_complete_extraction_examples.xml +++ /dev/null @@ -1,881 +0,0 @@ - - - Examples of both documentation extraction and verification workflows demonstrating flexible discovery methods and comprehensive UI/UX analysis. - - - - - Extract comprehensive documentation for a JWT-based authentication system, including technical implementation, UI/UX elements, and user workflows. - - - - - Initialize and discover feature using flexible methods - - -src -false - - ]]> - Look for auth-related directories like auth/, authentication/, or security/ - -src/auth -true - - ]]> - - - Auth controllers, services, middleware, models, and routes - - Login components and forms - - Session management UI - - - - - Analyze code structure and architecture - -src/auth - - ]]> - - - Identify main classes/functions - - Map authentication flow - - Find token generation/validation logic - - Locate UI components - - - - - Read core implementation files - - - - src/auth/auth.controller.ts - - - src/auth/auth.service.ts - - - src/auth/jwt.strategy.ts - - - src/auth/auth.guard.ts - - - src/models/user.model.ts - - - - ]]> - - - - UI/UX Analysis - Discover UI components - - -src/components -(Login|Auth|Session|Password).*\.(tsx?|jsx?) -*.tsx - - ]]> - - - - src/components/LoginForm.tsx - - - src/components/SessionManager.tsx - - - src/components/PasswordReset.tsx - - - - ]]> - - - - UI/UX Analysis - Map user interactions - -src/components -onClick|onSubmit|onChange|handleSubmit|validate - - ]]> - - - Form validation patterns - - User feedback mechanisms - - Error handling UI - - Loading states - - - - - UI/UX Analysis - Visual patterns and accessibility - -src -className=|style=|theme\.|aria-|role= -*.tsx - - ]]> - -src/styles -\.login|\.auth|\.session -*.css - - ]]> - - - - Extract API endpoints and configuration - -src/auth -@(Post|Get)\(['"]\/auth\/[^'"]+['"]|router\.(post|get)\(['"]\/auth\/[^'"]+['"] - - ]]> - -src -JWT_SECRET|JWT_EXPIRATION|AUTH_.*|process\.env\.\w*AUTH\w* - - ]]> - - - - Optional: Use semantic search for deeper insights - - -authentication error handling user feedback messages - - ]]> - Used when specific patterns don't capture all relevant error handling - - - - Create comprehensive extraction report - -EXTRACTION-authentication-system.md - -# Authentication System - Feature Extraction Report - -## Executive Summary -The authentication system is a JWT-based security implementation that handles user registration, login, session management, and access control. It provides a comprehensive user experience with visual feedback, accessibility features, and robust error handling. - -## UI/UX Analysis - -### User Interface Components - -#### 1. Login Page (`src/components/LoginForm.tsx`) -**Visual Layout:** -- Centered card design with shadow (400px width) -- Company logo at top -- Form fields with floating labels -- Primary blue theme (#1976d2) - -**Interactive Elements:** -- Email input field - - Real-time validation (regex: /^[^\s@]+@[^\s@]+\.[^\s@]+$/) - - Error state: Red border and helper text - - Success state: Green checkmark icon -- Password field - - Show/hide toggle button (eye icon) - - Minimum 8 characters validation - - Caps lock warning indicator -- "Remember me" checkbox with 30-day persistence -- "Forgot password?" link (underlined on hover) -- Submit button - - Disabled state: Gray background until valid input - - Loading state: Spinner replaces text - - Success state: Checkmark animation - -**User Feedback:** -- Loading overlay with spinner during authentication -- Error messages appear with slide-down animation -- Success toast notification (3s duration) -- Form shake animation on error - -#### 2. Registration Form (`src/components/RegisterForm.tsx`) -**Multi-Step Design:** -- Progress bar showing 3 steps -- Smooth slide transitions between steps -- Back/Next navigation buttons - -**Step 1 - Account Info:** -- Email field with async availability check -- Password field with strength meter (5 levels) -- Password confirmation with match validation - -**Step 2 - Personal Info:** -- First/Last name fields -- Optional phone with format mask -- Country dropdown with flag icons - -**Step 3 - Terms & Submit:** -- Terms of service scrollable text -- Privacy policy link (opens modal) -- Checkbox required for submission -- Review summary before final submit - -**Visual Feedback:** -- Field validation on blur -- Progress saved in localStorage -- Success confetti animation -- Auto-redirect countdown (5s) - -#### 3. Session Management (`src/components/SessionManager.tsx`) -**Device List UI:** -- Card-based layout for each session -- Device icons (FontAwesome) - - fa-mobile for mobile - - fa-desktop for desktop - - fa-tablet for tablet -- Information displayed: - - Device name and browser - - IP address (partially masked) - - Last active (relative time) - - Location (city, country) - -**Interactive Features:** -- Current device highlighted with blue border -- Hover state shows "Revoke" button -- Confirmation modal with device details -- Bulk selection with checkboxes -- "Revoke All" with double confirmation - -### User Experience Elements - -#### Visual Patterns -**Theme System:** -```css ---primary-color: #1976d2; ---error-color: #d32f2f; ---success-color: #388e3c; ---warning-color: #f57c00; ---text-primary: rgba(0, 0, 0, 0.87); ---text-secondary: rgba(0, 0, 0, 0.6); -``` - -**Animations:** -- Page transitions: 300ms ease-in-out -- Button hover: scale(1.02) -- Error shake: 0.5s horizontal -- Success checkmark: SVG path animation -- Loading spinner: 1s rotation - -**Responsive Breakpoints:** -- Mobile: < 768px (single column) -- Tablet: 768px - 1024px -- Desktop: > 1024px - -#### Accessibility Features -**Keyboard Navigation:** -- Tab order follows visual flow -- Enter key submits forms -- Escape closes modals -- Arrow keys in dropdowns - -**Screen Reader Support:** -- ARIA labels on all inputs -- Live regions for errors -- Role attributes for custom components -- Descriptive button text - -**Visual Accessibility:** -- 4.5:1 contrast ratio minimum -- Focus indicators (2px outline) -- Error icons for colorblind users -- Scalable fonts (rem units) - -### User Workflows - -#### 1. First-Time Registration -``` -Start → Landing Page → "Get Started" CTA - ↓ -Registration Form (Step 1) - → Email validation (async) - → Password strength check - → Real-time feedback - ↓ -Personal Info (Step 2) - → Optional fields clearly marked - → Format validation - ↓ -Terms Agreement (Step 3) - → Must scroll to enable checkbox - → Review summary - ↓ -Submit → Loading → Success - → Confetti animation - → Welcome email sent - → Auto-redirect (5s) - ↓ -Dashboard (First-time tour) -``` - -#### 2. Returning User Login -``` -Start → Login Page - ↓ -Enter Credentials - → Email autocomplete - → Password manager integration - → "Remember me" option - ↓ -Submit → Loading (avg 1.2s) - ↓ -Success → Dashboard - OR -Error → Inline feedback - → Retry with guidance - → "Forgot password?" option -``` - -#### 3. Password Reset Flow -``` -Login Page → "Forgot password?" - ↓ -Modal Dialog - → Email input - → Captcha (if multiple attempts) - ↓ -Submit → "Check email" message - ↓ -Email Received (< 1 min) - → Secure link (1hr expiry) - ↓ -Reset Page - → New password requirements shown - → Strength meter - → Confirmation field - ↓ -Submit → Success → Login redirect -``` - -## Technical Details - -### Core Components -1. **AuthController** (`src/auth/auth.controller.ts`) - - REST endpoints with validation decorators - - Rate limiting middleware - - CORS configuration - -2. **AuthService** (`src/auth/auth.service.ts`) - - JWT token generation/validation - - Bcrypt password hashing - - Session management logic - -3. **Security Implementation** - - JWT RS256 algorithm - - Refresh token rotation - - CSRF double-submit cookies - - XSS protection headers - -### API Endpoints -| Method | Endpoint | Description | Rate Limit | -|--------|----------|-------------|------------| -| POST | /auth/register | New user registration | 3/hour | -| POST | /auth/login | User authentication | 5/min | -| POST | /auth/refresh | Token refresh | 10/min | -| POST | /auth/logout | Session termination | None | -| GET | /auth/profile | Current user data | None | -| POST | /auth/reset-password | Password reset | 3/hour | - -### Configuration -```env -# Required -JWT_SECRET=minimum-32-character-secret -DATABASE_URL=postgresql://... - -# Optional with defaults -JWT_EXPIRATION=15m -REFRESH_TOKEN_EXPIRATION=7d -BCRYPT_ROUNDS=10 -SESSION_MAX_AGE=30d -MAX_SESSIONS_PER_USER=5 -``` - -## Non-Technical Information - -### Business Rules -1. **Account Creation** - - Unique email required - - Password: 8+ chars, mixed case, number, special - - Email verification within 24 hours - - Terms acceptance mandatory - -2. **Session Management** - - Max 5 concurrent sessions - - Idle timeout: 30 minutes - - Absolute timeout: 7 days - - Device trust for 30 days - -3. **Security Policies** - - Account lockout: 5 failed attempts (15 min) - - Password history: Last 3 not reusable - - 2FA optional but recommended - - Suspicious login notifications - -### Common User Scenarios - -#### Mobile Experience -- Touch-optimized buttons (44px min) -- Biometric login (Face ID/Touch ID) -- Simplified navigation menu -- Offline detection with retry -- Push notification for new sessions - -#### Error Recovery -- Network timeout: Auto-retry with backoff -- Session expired: Smooth re-login flow -- Form errors: Contextual help text -- Server errors: Friendly messages with support link - -### Performance Metrics -- Login response: 200ms (p50), 500ms (p95) -- Page load: 1.2s (3G), 400ms (4G) -- Token validation: < 10ms -- Session check: < 50ms - -## Documentation Recommendations - -### Critical Areas for User Documentation -1. **Getting Started Guide** - - Screenshots of each registration step - - Common email provider settings - - Password manager setup - -2. **Troubleshooting Section** - - "Why can't I log in?" flowchart - - Browser compatibility matrix - - Cookie/JavaScript requirements - -3. **Security Best Practices** - - How to spot phishing attempts - - Importance of unique passwords - - When to revoke sessions - -### Developer Integration Guide -1. **API Authentication** - - Bearer token format - - Refresh token flow diagram - - Error response examples - -2. **SDK Examples** - - JavaScript/TypeScript - - Python - - Mobile (iOS/Android) - -## Integration Points -- Email service for password reset and notifications -- Session storage (Redis optional, in-memory default) -- Rate limiting middleware -- CORS configuration for cross-origin requests -- Logging service for audit trails - -## Summary for Documentation Team -This extraction report provides comprehensive details about the authentication system's current implementation. The system offers a complete user experience with visual feedback, accessibility features, and robust security measures. Key areas for user documentation include the multi-step registration process, session management features, and security policies. The technical implementation uses industry-standard JWT tokens with proper security measures. - -354 - - ]]> - - - - - Use flexible discovery methods - directory exploration and pattern search are primary tools - Comprehensive UI/UX analysis is essential for complete documentation - Document visual elements, interactions, and user feedback mechanisms - Include accessibility and responsive design considerations - Semantic search (codebase_search) is optional for deeper insights when needed - Organize findings to support documentation team's work - - - - - - Verify user-provided documentation about authentication UI/UX against the actual implementation. - - - - - User provides documentation to verify - - "Please verify our login documentation: - - 'The login form provides a seamless experience with instant validation. Users see a green checkmark when their email is valid. The password field includes a strength indicator. After 3 failed attempts, users must wait 5 minutes before trying again.'" - - - - - Analyze the documentation claims - - Claims to verify: - - Instant email validation with green checkmark - - Password strength indicator on login - - 3 failed attempts triggers 5-minute wait - - - - - Find login UI components using directory exploration - -src/components -false - - ]]> - -src/components -Login|SignIn|Auth.*Form -*.tsx - - ]]> - - - - Examine login form implementation - - - - src/components/LoginForm.tsx - - - src/components/LoginForm.css - - - - ]]> - - - Email validation exists but shows red X, not green checkmark - - No password strength indicator on login form (only on registration) - - Account lockout is after 5 attempts, not 3 - - Lockout duration is 15 minutes, not 5 - - - - - Verify validation behavior - -src/components -validateEmail|emailValid|checkmark|strength.*indicator - - ]]> - -src/auth -failed.*attempts|lockout|rate.*limit - - ]]> - - - - Create verification report - -VERIFICATION-login-documentation.md - -# Login Documentation - Verification Report - -## Overall Assessment: Needs Significant Updates - -## Summary -The login documentation contains several inaccuracies about the UI/UX implementation that could confuse users and lead to poor user experience. - -## Critical Inaccuracies - -### 1. Email Validation Feedback -**Documentation States**: "Users see a green checkmark when their email is valid" -**Actual Implementation**: Red X icon for invalid emails, no positive feedback -- Location: `src/components/LoginForm.tsx:45-52` -- Current behavior: Only shows error states -- Missing: Success indicator implementation -- **Impact**: Users don't get positive reinforcement for correct input - -### 2. Password Strength Indicator -**Documentation States**: "The password field includes a strength indicator" -**Actual Implementation**: No strength indicator on login form -- Location: Checked `src/components/LoginForm.tsx` -- Strength indicator only exists in `RegisterForm.tsx` -- Login form has basic password field only -- **Impact**: Sets incorrect user expectations - -### 3. Failed Login Attempts -**Documentation States**: "After 3 failed attempts, users must wait 5 minutes" -**Actual Implementation**: 5 attempts, 15-minute lockout -- Location: `src/auth/services/auth.service.ts:234` -- Code: `MAX_LOGIN_ATTEMPTS = 5` -- Code: `LOCKOUT_DURATION = 15 * 60 * 1000 // 15 minutes` -- **Impact**: Users may be confused about actual security policy - -## UI/UX Elements Found But Not Documented - -### 1. Loading States -- Spinner overlay during authentication -- Button disabled state during submission -- These improve perceived performance - -### 2. Remember Me Feature -- 30-day persistent session option -- Stored in secure cookie -- Not mentioned in documentation - -### 3. Caps Lock Warning -- Visual indicator when caps lock is on -- Helps prevent login failures -- Important accessibility feature - -## Recommendations - -### Immediate Corrections Needed -1. **Email Validation**: - ``` - CORRECT: "The email field shows a red error icon for invalid formats. Valid emails are accepted without visual confirmation." - ``` - -2. **Password Features**: - ``` - CORRECT: "The login form has a standard password field with show/hide toggle. Password strength indicators are only shown during registration." - ``` - -3. **Security Policy**: - ``` - CORRECT: "After 5 failed login attempts, accounts are temporarily locked for 15 minutes to prevent brute force attacks." - ``` - -### Suggested Additions -1. Document the loading states and visual feedback -2. Explain the "Remember Me" functionality -3. Mention the Caps Lock warning feature -4. Add screenshots of actual UI states - -## Technical References -- Login Form Component: `src/components/LoginForm.tsx` -- Styles: `src/components/LoginForm.css` -- Auth Service: `src/auth/services/auth.service.ts` -- Rate Limiting: `src/middleware/rateLimiter.ts` - -## Visual Discrepancies -- Documentation implies more visual feedback than implemented -- Color scheme in docs doesn't match actual theme -- Missing documentation for error message styling - -## Conclusion -The documentation needs updates to accurately reflect the current login experience. The actual implementation is more security-focused (5 attempts, 15-min lockout) but less visually rich (no success indicators) than documented. - -95 - - ]]> - - - - - Always verify UI/UX claims against actual component implementation - Check both visual elements and behavioral logic - Look for undocumented features that users might expect - Provide specific corrections with code references - Consider the impact of inaccuracies on user experience - - - - - - Extract comprehensive API documentation including endpoints, request/response formats, and error handling. - - - - - Discover API structure using flexible methods - - -src -false - - ]]> - -src/api -true - - ]]> - - - - Find all API routes using pattern search - -src -(app|router)\.(get|post|put|patch|delete|all)\s*\(\s*['"`]([^'"`]+)['"`] - - ]]> - - - - Extract request validation schemas - -src -@(Body|Query|Param|Headers)\(|joi\.object|yup\.object|zod\.object - - ]]> - - - - Analyze error handling and responses - -src -@ApiResponse|response\.status\(|res\.status\(|throw new.*Error - - ]]> - - - - Optional: Semantic search for middleware and auth - - -API middleware authentication authorization guards - - ]]> - - - - Generate API extraction report - - - Complete endpoint inventory with methods and paths - - Request/response schemas with examples - - Authentication requirements per endpoint - - Rate limiting and throttling rules - - Error response formats and codes - - API versioning strategy - - - - - - - - Document a React component library including props, styling, accessibility, and usage patterns. - - - - - Discover component structure - -src/components -true - - ]]> - - - - Analyze component interfaces and props - -src/components -interface\s+\w+Props|type\s+\w+Props|export\s+(default\s+)?function|export\s+const -*.tsx - - ]]> - - - - Extract styling and theme usage - -src/components -styled\.|makeStyles|className=|sx=|css= - - ]]> - - - - Document accessibility features - -src/components -aria-|role=|tabIndex|alt=|htmlFor= - - ]]> - - - - Find usage examples and stories - -src -\.stories\.|\.story\.|examples?/|demo/ -*.tsx - - ]]> - - - - Create component library report - - - Component hierarchy and relationships - - Props documentation with types and defaults - - Styling system and customization options - - Accessibility compliance checklist - - Interactive examples and code snippets - - Best practices and anti-patterns - - Browser compatibility notes - - - - - - - - Use the most appropriate discovery method - - Start with directory exploration for well-organized codebases - Use pattern search for specific syntax or naming conventions - Apply file-based search when you know exact locations - Reserve semantic search for complex conceptual queries - - - - - Ensure complete UI/UX documentation - - Visual design and layout - Interactive elements and states - User feedback mechanisms - Accessibility features - Responsive behavior - Animation and transitions - Error states and recovery - Loading and progress indicators - - - - - Verify all aspects of documentation claims - - Technical accuracy of code examples - UI element descriptions match implementation - User workflows reflect actual behavior - Configuration values are current - Error messages match code - Performance claims are realistic - - - - \ No newline at end of file diff --git a/.roo/rules-docs-extractor/7_user_friendly_examples.xml b/.roo/rules-docs-extractor/7_user_friendly_examples.xml deleted file mode 100644 index 6b94e88de60..00000000000 --- a/.roo/rules-docs-extractor/7_user_friendly_examples.xml +++ /dev/null @@ -1,218 +0,0 @@ - - - Examples for creating user-focused, practical documentation. - - - - - The concurrent file read feature uses parallel processing. - Read multiple files at once, reducing interruptions. - - - - This improves efficiency. - Instead of approving 10 file reads one-by-one, approve them all at once. - - - - The feature uses a thread pool with configurable concurrency limits. - Roo reads up to 100 files at once (changeable in settings). - - - - Users must configure the concurrent file read limit parameter. - Adjust how many files Roo reads at once in settings. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - The system imposes a hard limit of 100 concurrent operations. - Roo handles up to 100 files at once. - - - - Error: Maximum concurrency threshold exceeded. - Too many files requested. Lower the file limit in settings. - - - - Reduces API call overhead through request batching. - Get answers faster by reading all needed files at once. - - - - - - Error: ⚠️ - Tip: 💡 - Note: 📝 - Security: 🔒 - - - - For emphasis - For settings, file paths, or commands - For callouts or warnings - - - - - - Concurrent File Reads Doc - - - - - Does it start with benefits? - Are technical terms avoided? - Is the tone direct? - Are there practical examples? - Are sections short and scannable? - Does it answer user questions? - Is help accessible? - - \ No newline at end of file diff --git a/.roo/rules-issue-fixer/1_Workflow.xml b/.roo/rules-issue-fixer/1_Workflow.xml index 91682de6588..c6f2b570a08 100644 --- a/.roo/rules-issue-fixer/1_Workflow.xml +++ b/.roo/rules-issue-fixer/1_Workflow.xml @@ -17,7 +17,7 @@ Then retrieve the issue: - gh issue view [issue-number] --repo [owner]/[repo] --json number,title,body,state,labels,assignees,milestone,createdAt,updatedAt,closedAt,author + gh api repos/[owner]/[repo]/issues/[issue-number] --jq '{number,title,body,state,labels,assignees,milestone,createdAt:.created_at,updatedAt:.updated_at,closedAt:.closed_at,author:.user.login}' If the command fails with an authentication error (e.g., "gh: Not authenticated" or "HTTP 401"), ask the user to authenticate: @@ -49,7 +49,7 @@ - Any decisions or changes to requirements - gh issue view [issue number] --repo [owner]/[repo] --comments + gh api repos/[owner]/[repo]/issues/[issue-number]/comments --paginate --jq '.[].body' Also check for: @@ -191,7 +191,6 @@ Use appropriate tools: - apply_diff for targeted changes - write_to_file for new files - - search_and_replace for systematic updates After each significant change, run relevant tests: - execute_command to run test suites @@ -273,6 +272,13 @@ - [Note any technical terms or constraints] Please ensure all translations maintain consistency with existing terminology and follow the project's localization guidelines. + +[ ] Identify all user-facing strings that need translation +[ ] Update i18n JSON files for all supported languages +[ ] Translate any markdown documentation files +[ ] Verify translations maintain consistency with existing terminology +[ ] Test translations in the application context + Wait for the translation task to complete before proceeding to testing. diff --git a/.roo/rules-issue-fixer/2_best_practices.xml b/.roo/rules-issue-fixer/2_best_practices.xml index dede40a92f5..81a06fe94a6 100644 --- a/.roo/rules-issue-fixer/2_best_practices.xml +++ b/.roo/rules-issue-fixer/2_best_practices.xml @@ -14,5 +14,6 @@ - Check for accessibility issues (for UI changes) - Delegate translation tasks to translate mode when implementing user-facing changes - Always check for hard-coded strings and internationalization needs + - When using new_task to delegate work, always include a comprehensive todos list - Wait for translation completion before proceeding to final testing \ No newline at end of file diff --git a/.roo/rules-issue-fixer/4_github_cli_usage.xml b/.roo/rules-issue-fixer/4_github_cli_usage.xml index e12fb06a5b4..b4fa19b3cc4 100644 --- a/.roo/rules-issue-fixer/4_github_cli_usage.xml +++ b/.roo/rules-issue-fixer/4_github_cli_usage.xml @@ -29,23 +29,23 @@ - Retrieve the issue details at the start + Retrieve the issue details at the start using the REST Issues API. Always use first to get the full issue content - gh issue view [issue-number] --repo [owner]/[repo] --json number,title,body,state,labels,assignees,milestone,createdAt,updatedAt,closedAt,author + gh api repos/[owner]/[repo]/issues/[issue-number] --jq '{number,title,body,state,labels,assignees,milestone,createdAt:.created_at,updatedAt:.updated_at,closedAt:.closed_at,author:.user.login}' - gh issue view 123 --repo octocat/hello-world --json number,title,body,state,labels,assignees,milestone,createdAt,updatedAt,closedAt,author + gh api repos/octocat/hello-world/issues/123 --jq '{number,title,body,state,labels,assignees,milestone,createdAt:.created_at,updatedAt:.updated_at,closedAt:.closed_at,author:.user.login}' - Get additional context and requirements from issue comments + Get additional context and requirements from issue comments. Always use after viewing issue to see full discussion - gh issue view [issue-number] --repo [owner]/[repo] --comments + gh api repos/[owner]/[repo]/issues/[issue-number]/comments --paginate --jq '.[].body' - gh issue view 123 --repo octocat/hello-world --comments + gh api repos/octocat/hello-world/issues/123/comments --paginate --jq '.[].body' @@ -109,6 +109,30 @@ + + + Inspect associations with GitHub Projects (new Projects experience) for a given issue + Use when project context is relevant to understanding priority, ownership, or workflow + gh api graphql -f query=' +query($owner:String!, $repo:String!, $number:Int!) { + repository(owner:$owner, name:$repo) { + issue(number:$number) { + projectsV2(first:20) { + nodes { + title + url + } + } + } + } +} +' -F owner=[owner] -F repo=[repo] -F number=[issue-number] + + This uses the projectsV2 field from the new GitHub Projects experience for issue-level project context. + + + + Create a pull request diff --git a/.roo/rules-merge-resolver/1_workflow.xml b/.roo/rules-merge-resolver/1_workflow.xml index a63809db70e..2f0d1162f64 100644 --- a/.roo/rules-merge-resolver/1_workflow.xml +++ b/.roo/rules-merge-resolver/1_workflow.xml @@ -30,12 +30,13 @@ gh pr checkout [PR_NUMBER] --force git fetch origin main - git rebase origin/main + GIT_EDITOR=true git rebase origin/main
Force checkout the PR branch to ensure clean state Fetch the latest main branch Attempt to rebase onto main to reveal conflicts + Use GIT_EDITOR=true to ensure non-interactive rebase
@@ -108,8 +109,8 @@
- git rebase origin/main - Rebase current branch onto main to reveal conflicts + GIT_EDITOR=true git rebase origin/main + Rebase current branch onto main to reveal conflicts (non-interactive) @@ -133,6 +134,20 @@ + + GIT_EDITOR=true git rebase --continue + Continue rebase after resolving conflicts (non-interactive) + + + + + + true + Set to 'true' (a no-op command) to prevent interactive prompts during rebase operations + Prefix git rebase commands with GIT_EDITOR=true to ensure non-interactive execution + + + All merge conflicts have been resolved Resolved files have been staged diff --git a/.roo/rules-merge-resolver/2_best_practices.xml b/.roo/rules-merge-resolver/2_best_practices.xml index 5bf1b393eb2..ea6c7cc6a52 100644 --- a/.roo/rules-merge-resolver/2_best_practices.xml +++ b/.roo/rules-merge-resolver/2_best_practices.xml @@ -33,7 +33,7 @@ Escape Conflict Markers - When using apply_diff or search_and_replace tools, always escape merge + When using apply_diff, always escape merge conflict markers with backslashes to prevent parsing errors. - apply_diff or search_and_replace + apply_diff To resolve conflicts by replacing conflicted sections Precise editing of specific conflict blocks @@ -26,6 +26,8 @@ Chain git commands with && for efficiency Use --format options for structured output Capture command output for parsing + Use GIT_EDITOR=true for non-interactive git rebase operations + Set environment variables inline to avoid prompts during automation @@ -46,7 +48,7 @@ Rebase onto main to reveal conflicts - git rebase origin/main + GIT_EDITOR=true git rebase origin/main @@ -71,7 +73,7 @@ Continue rebase after resolution - git rebase --continue + GIT_EDITOR=true git rebase --continue @@ -125,25 +127,6 @@ function mergedImplementation() { ]]> - - - Use for simple conflict resolutions - Enable regex mode for complex patterns - Always escape special characters - - - -src/config.ts -\<<<<<<< HEAD[\s\S]*?\>>>>>>> \w+ -// Resolved configuration -const config = { - // Merged settings from both branches -} -true - - ]]> - @@ -152,7 +135,7 @@ const config = { execute_command - Get PR info with gh CLI execute_command - Checkout PR with gh pr checkout --force execute_command - Fetch origin main - execute_command - Rebase onto origin/main + execute_command - Rebase onto origin/main with GIT_EDITOR=true execute_command - Check for conflicts with git status @@ -178,13 +161,22 @@ const config = { execute_command - Check all conflicts resolved - execute_command - Continue rebase with git rebase --continue + execute_command - Continue rebase with GIT_EDITOR=true git rebase --continue execute_command - Verify clean status + + Git commands waiting for interactive input + + Use GIT_EDITOR=true to bypass editor prompts + Set GIT_SEQUENCE_EDITOR=true for sequence editing + Consider --no-edit flag for commit operations + + + Rebase completes without conflicts @@ -204,7 +196,7 @@ const config = { Conflict markers are incomplete or nested - Use search_and_replace with careful regex patterns + Use apply_diff with precise search blocks; split into multiple targeted edits if needed Manual inspection may be required @@ -221,8 +213,46 @@ const config = { Code contains literal conflict marker strings Extra careful escaping in diffs - Consider using search_and_replace with precise patterns + Prefer apply_diff with precise search blocks + + + + Ensuring git operations run without requiring user interaction is critical + for automated conflict resolution. The mode uses environment variables to + bypass interactive prompts. + + + + + Set to 'true' (a no-op command) to skip editor prompts + GIT_EDITOR=true git rebase --continue + During rebase operations that would normally open an editor + + + + Skip interactive rebase todo editing + GIT_SEQUENCE_EDITOR=true git rebase -i HEAD~3 + When interactive rebase is triggered but no editing needed + + + + Use flags to avoid interactive prompts + + git commit --no-edit (use existing message) + git merge --no-edit (skip merge message editing) + git cherry-pick --no-edit (keep original message) + + + + + + Always test commands locally first to identify potential prompts + Combine environment variables when multiple editors might be invoked + Document why non-interactive mode is used in comments + Have fallback strategies if automation fails + + \ No newline at end of file diff --git a/.roo/rules-merge-resolver/4_complete_example.xml b/.roo/rules-merge-resolver/4_complete_example.xml index dae85879971..32b2bf344bc 100644 --- a/.roo/rules-merge-resolver/4_complete_example.xml +++ b/.roo/rules-merge-resolver/4_complete_example.xml @@ -54,7 +54,7 @@ From github.com:user/repo -git rebase origin/main +GIT_EDITOR=true git rebase origin/main ]]> -git rebase --continue +GIT_EDITOR=true git rebase --continue ]]> Use git blame and commit messages to understand the history Combine non-conflicting improvements when possible Prioritize bugfixes while accommodating refactors - Complete the rebase process with git rebase --continue + Use GIT_EDITOR=true to ensure non-interactive rebase operations + Complete the rebase process with GIT_EDITOR=true git rebase --continue Validate that both sets of changes work together \ No newline at end of file diff --git a/.roo/rules-pr-reviewer/1_workflow.xml b/.roo/rules-pr-reviewer/1_workflow.xml deleted file mode 100644 index 444694a5209..00000000000 --- a/.roo/rules-pr-reviewer/1_workflow.xml +++ /dev/null @@ -1,493 +0,0 @@ - - - Initialize Review Process - - Create a todo list to track the PR review workflow: - - - - [ ] Fetch pull request information - [ ] Fetch associated issue (if any) - [ ] Fetch pull request diff - [ ] Fetch existing PR comments and reviews - [ ] Check out pull request locally - [ ] Verify existing comments against current code - [ ] Perform comprehensive review - [ ] Prepare review comments - [ ] Preview review with user - [ ] Submit review or create tasks - - - - This helps track progress through the review process and ensures all steps are completed. - - - - - Fetch Pull Request Information - - If the user provides a PR number or URL, extract the necessary information: - - Repository owner and name - - Pull request number - - Use the GitHub CLI to fetch the PR details: - - - gh pr view [PR_NUMBER] --repo [owner]/[repo] --json number,title,body,author,state,url,headRefName,baseRefName,headRefOid,mergeable,isDraft,createdAt,updatedAt - - - Parse the JSON output to understand the PR's current state and metadata. - IMPORTANT: Save the headRefOid value as it will be needed for submitting the review via the API. - - - - [x] Fetch pull request information - [ ] Fetch associated issue (if any) - [ ] Fetch pull request diff - [ ] Fetch existing PR comments and reviews - [ ] Check out pull request locally - [ ] Verify existing comments against current code - [ ] Perform comprehensive review - [ ] Prepare review comments - [ ] Preview review with user - [ ] Submit review or create tasks - - - - - - - Fetch Associated Issue (If Any) - - Check the pull request body for a reference to a GitHub issue (e.g., "Fixes #123", "Closes #456"). - If an issue is referenced, use the GitHub CLI to fetch its details: - - - gh issue view [issue_number] --repo [owner]/[repo] --json number,title,body,author,state,url,createdAt,updatedAt,comments - - - The issue description and comments can provide valuable context for the review. - - - - [x] Fetch pull request information - [x] Fetch associated issue (if any) - [ ] Fetch pull request diff - [ ] Fetch existing PR comments and reviews - [ ] Check out pull request locally - [ ] Verify existing comments against current code - [ ] Perform comprehensive review - [ ] Prepare review comments - [ ] Preview review with user - [ ] Submit review or create tasks - - - - - - - Fetch Pull Request Diff - - Get the pull request diff to understand the changes: - - - gh pr diff [PR_NUMBER] --repo [owner]/[repo] - - - This will show the complete diff of all changes in the PR. - - - - [x] Fetch pull request information - [x] Fetch associated issue (if any) - [x] Fetch pull request diff - [ ] Fetch existing PR comments and reviews - [ ] Check out pull request locally - [ ] Verify existing comments against current code - [ ] Perform comprehensive review - [ ] Prepare review comments - [ ] Preview review with user - [ ] Submit review or create tasks - - - - - - - Fetch Existing PR Comments and Reviews - - IMPORTANT: Before reviewing any code, first get all existing comments and reviews to understand what feedback has already been provided: - - Fetch all review comments: - - gh pr view [PR_NUMBER] --repo [owner]/[repo] --comments - - - Also fetch review details: - - gh api repos/[owner]/[repo]/pulls/[PR_NUMBER]/reviews - - - Create a mental or written list of: - - All issues/suggestions that have been raised - - The specific files and line numbers mentioned - - Whether comments appear to be resolved or still pending - - This information will guide your review to avoid duplicate feedback. - - - - [x] Fetch pull request information - [x] Fetch associated issue (if any) - [x] Fetch pull request diff - [x] Fetch existing PR comments and reviews - [ ] Check out pull request locally - [ ] Verify existing comments against current code - [ ] Perform comprehensive review - [ ] Prepare review comments - [ ] Preview review with user - [ ] Submit review or create tasks - - - - - - - Check Out Pull Request Locally - - Use the GitHub CLI to check out the pull request locally: - - - gh pr checkout [PR_NUMBER] --repo [owner]/[repo] - - - This allows you to: - - Navigate the actual code structure - - Understand how changes interact with existing code - - Get better context for your review - - - - [x] Fetch pull request information - [x] Fetch associated issue (if any) - [x] Fetch pull request diff - [x] Fetch existing PR comments and reviews - [x] Check out pull request locally - [ ] Verify existing comments against current code - [ ] Perform comprehensive review - [ ] Prepare review comments - [ ] Preview review with user - [ ] Submit review or create tasks - - - - - - - Verify Existing Comments Against Current Code - - Now that you have the code checked out locally and know what comments exist: - - 1. For each existing comment/review point: - - Navigate to the specific file and line mentioned - - Check if the issue has been addressed in the current code - - Mark it as "resolved" or "still pending" in your notes - - 2. Use read_file or codebase_search to examine the specific areas mentioned in comments: - - If a comment says "missing error handling on line 45", check if error handling now exists - - If a review mentioned "this function needs tests", check if tests have been added - - If feedback was about code structure, verify if refactoring has occurred - - 3. Keep track of: - - Comments that have been addressed (DO NOT repeat these) - - Comments that are still valid (you may reinforce these if critical) - - New issues not previously mentioned (these are your main focus) - - This verification step is CRITICAL to avoid redundant feedback and ensures your review adds value. - - - - [x] Fetch pull request information - [x] Fetch associated issue (if any) - [x] Fetch pull request diff - [x] Fetch existing PR comments and reviews - [x] Check out pull request locally - [x] Verify existing comments against current code - [ ] Perform comprehensive review - [ ] Prepare review comments - [ ] Preview review with user - [ ] Submit review or create tasks - - - - - - - Perform Comprehensive Review - - Review the pull request thoroughly: - - Verify that the changes are directly related to the linked issue and do not include unrelated modifications. - - Focus primarily on the changes made in the PR. - - Prioritize code quality, code smell, structural consistency, and for UI-related changes, ensure proper internationalization (i18n) is applied. - - Watch for signs of technical debt (e.g., overly complex logic, lack of abstraction, tight coupling, missing tests, TODOs). - - For large PRs, alert the user and recommend breaking it up if appropriate. - - NEVER run tests or execute code in PR Reviewer mode. The repository likely has automated testing. Your role is limited to: - - Code review and analysis - - Leaving review comments - - Checking code quality and structure - - Reviewing test coverage and quality (without execution) - - Document your findings in a numbered list format: - 1. Code quality issues - 2. Structural improvements - 3. Missing tests or documentation - 4. Potential bugs or edge cases - 5. Performance concerns - 6. Security considerations - 7. Internationalization (i18n) issues - 8. Technical debt indicators - - - - [x] Fetch pull request information - [x] Fetch associated issue (if any) - [x] Fetch pull request diff - [x] Fetch existing PR comments and reviews - [x] Check out pull request locally - [x] Verify existing comments against current code - [x] Perform comprehensive review - [ ] Prepare review comments - [ ] Preview review with user - [ ] Submit review or create tasks - - - - - - - Prepare Review Comments - - Format your review comments following these guidelines: - - CRITICAL: Before adding any comment, verify it's not already addressed: - - Cross-reference with your notes from Step 7 - - Only comment on NEW issues or UNRESOLVED existing issues - - Never repeat feedback that has been addressed in the current code - - Your suggestions should: - - Use a **friendly, curious tone** — prefer asking: "Is this intentional?" or "Could we approach this differently to improve X?" - - Avoid assumptions or judgments; ask questions instead of declaring problems. - - Skip ALL praise and positive comments. Focus exclusively on issues that need attention. - - Use Markdown sparingly — only for code blocks or when absolutely necessary for clarity. Avoid markdown headings (###, ##, etc.) entirely. - - Avoid including internal evaluation terminology (e.g., scores or internal tags) in public comments. - - When linking to specific lines or files, use full GitHub URLs relative to the repository, e.g. - `https://github.com/[owner]/[repo]/blob/[branch]/[path/to/file]#L[line-number]`. - - Present your findings as a numbered list organized by priority: - - **Critical Issues (Must Fix):** - 1. [Issue description with file/line reference] - 2. [Issue description with file/line reference] - - **Important Suggestions (Should Consider):** - 3. [Suggestion with rationale] - 4. [Suggestion with rationale] - - **Minor Improvements (Nice to Have):** - 5. [Improvement suggestion] - 6. [Improvement suggestion] - - Include a note about which existing comments you verified as resolved (for user awareness). - - - - [x] Fetch pull request information - [x] Fetch associated issue (if any) - [x] Fetch pull request diff - [x] Fetch existing PR comments and reviews - [x] Check out pull request locally - [x] Verify existing comments against current code - [x] Perform comprehensive review - [x] Prepare review comments - [ ] Preview review with user - [ ] Submit review or create tasks - - - - - - - Preview Review with User - - Always show the user a preview of your review suggestions and comments before taking any action. - Present your findings as a numbered list clearly for the user before submitting comments. - - - I've completed my review of PR #[number]. Here's what I found: - - [If applicable: **Existing comments that have been resolved:** - - Comment about X on file Y - now addressed - - Suggestion about Z - implemented] - - **Review Findings:** - - **Critical Issues (Must Fix):** - 1. [Specific issue with file/line reference] - 2. [Specific issue with file/line reference] - - **Important Suggestions (Should Consider):** - 3. [Suggestion with rationale] - 4. [Suggestion with rationale] - - **Minor Improvements (Nice to Have):** - 5. [Improvement suggestion] - 6. [Improvement suggestion] - - Would you like me to: - - Create a comprehensive review with all comments - Create individual tasks for each suggestion using new_task - Let me modify the suggestions first - Skip submission - just wanted the analysis - - - - - - [x] Fetch pull request information - [x] Fetch associated issue (if any) - [x] Fetch pull request diff - [x] Fetch existing PR comments and reviews - [x] Check out pull request locally - [x] Verify existing comments against current code - [x] Perform comprehensive review - [x] Prepare review comments - [x] Preview review with user - [ ] Submit review or create tasks - - - - - - - Submit Review - - Based on user preference, submit the review using the GitHub API to support inline comments: - - 1. Construct the review payload with inline comments. For each comment, you need: - - The file path (relative to repository root) - - The line number where the comment should appear - - The comment body text - - The side ("RIGHT" for new code, "LEFT" for old code) - - 2. Submit the review using the GitHub API with heredoc syntax: - - gh api -X POST repos/[owner]/[repo]/pulls/[PR_NUMBER]/reviews --input - < - - - The review will be created with all inline comments attached to specific lines of code. - - Example for a review: - - gh api -X POST repos/RooCodeInc/Roo-Code/pulls/6378/reviews --input - < - - - - - [x] Fetch pull request information - [x] Fetch associated issue (if any) - [x] Fetch pull request diff - [x] Fetch existing PR comments and reviews - [x] Check out pull request locally - [x] Verify existing comments against current code - [x] Perform comprehensive review - [x] Prepare review comments - [x] Preview review with user - [x] Submit review or create tasks - - - - - - - Create Tasks for Suggestions (Optional) - - If the user chooses to create individual tasks for each suggestion, use the new_task tool to create separate tasks: - - For each numbered finding from your review: - 1. Determine the appropriate mode based on the type of work needed: - - Use "code" mode for bug fixes, implementation changes, or refactoring - - Use "translate" mode for internationalization (i18n) issues - - Use "test" mode for missing or inadequate test coverage - - Use "docs-extractor" mode for documentation issues - - Use "architect" mode for structural or design improvements - - Use "debug" mode for investigating potential bugs - - 2. Create a clear, actionable task message that includes: - - The specific issue or suggestion - - The file(s) and line numbers affected - - Any relevant context from the PR - - The expected outcome - - 3. Use the new_task tool for each suggestion: - - [appropriate mode based on task type] - Fix [issue type] in [file]: [specific description of what needs to be done] - - - Example task creation: - - code - Fix missing error handling in src/api/users.ts:45-52. The getUserById function should handle cases where the user is not found and return an appropriate error response. - - - - translate - Add missing i18n translations for new user profile fields in src/components/UserProfile.tsx. The fields 'bio', 'location', and 'website' need to be wrapped with translation functions. - - - After creating all tasks, provide a summary: - "I've created [X] individual tasks for the review findings: - - [Y] code fixes/improvements - - [Z] translation/i18n tasks - - [etc.] - - Each task contains the specific context and requirements for addressing the issue." - - - \ No newline at end of file diff --git a/.roo/rules-pr-reviewer/2_best_practices.xml b/.roo/rules-pr-reviewer/2_best_practices.xml deleted file mode 100644 index f367f25b60e..00000000000 --- a/.roo/rules-pr-reviewer/2_best_practices.xml +++ /dev/null @@ -1,40 +0,0 @@ - - - ALWAYS create a todo list at the start to track the review workflow (Step 1) - - Use GitHub CLI (`gh`) commands instead of MCP tools for all GitHub operations - - ALWAYS fetch the PR's headRefOid in Step 2 - this is required for API review submission - - ALWAYS fetch existing comments and reviews BEFORE reviewing any code (Step 5) - - Create a list of all existing feedback before starting your review - - Check out the PR locally using `gh pr checkout` for better context understanding - - Systematically verify each existing comment against the current code (Step 7) - - Track which comments are resolved vs still pending - - Only provide feedback on NEW issues or UNRESOLVED existing issues - - Never duplicate feedback that has already been addressed - - Always fetch and review the entire PR diff before commenting - - Check for and review any associated issue for context - - Focus on the changes made, not unrelated code - - Ensure all changes are directly related to the linked issue - - Use a friendly, curious tone in all comments - - Ask questions rather than making assumptions - there may be intentions behind the code choices - - Provide actionable feedback with specific suggestions - - Focus exclusively on issues and improvements - skip all praise or positive comments - - Use minimal markdown - avoid headings (###, ##) and excessive formatting - - Only use markdown for code blocks or when absolutely necessary for clarity - - Consider the PR's scope - suggest breaking up large PRs - - Verify proper i18n implementation for UI changes - - Check for test coverage without executing tests - - Look for signs of technical debt and code smells - - Ensure consistency with existing code patterns - - Link to specific lines using full GitHub URLs - - Present findings in a numbered list format for clarity - - Group feedback by priority (critical, important, minor) - - Always preview comments with the user before submitting - - Use the GitHub API for submitting reviews to support inline comments - - Construct proper JSON payloads with commit_id, body, event, and comments array - - Each inline comment needs: path, body, line number, and side (RIGHT for new code) - - Use COMMENT when submitting the review - - Use heredoc syntax (--input - < \ No newline at end of file diff --git a/.roo/rules-pr-reviewer/3_common_mistakes_to_avoid.xml b/.roo/rules-pr-reviewer/3_common_mistakes_to_avoid.xml deleted file mode 100644 index 0aee6e6099f..00000000000 --- a/.roo/rules-pr-reviewer/3_common_mistakes_to_avoid.xml +++ /dev/null @@ -1,43 +0,0 @@ - - - Not creating a todo list at the start to track the review workflow - - Using MCP tools instead of GitHub CLI commands for GitHub operations - - Forgetting to fetch headRefOid in Step 2 - this is REQUIRED for API review submission - - Starting to review code WITHOUT first fetching existing comments and reviews - - Failing to create a list of existing feedback before reviewing - - Not systematically checking each existing comment against the current code - - Repeating feedback that has already been addressed in the current code - - Ignoring existing PR comments or failing to verify if they have already been resolved - - Running tests or executing code during review - - Making judgmental or harsh comments - - Providing feedback on code outside the PR's scope - - Overlooking unrelated changes not tied to the main issue - - Including ANY praise or positive comments - focus only on issues - - Using markdown headings (###, ##, #) in review comments - - Using excessive markdown formatting when plain text would suffice - - Submitting comments without user preview/approval - - Forgetting to check for an associated issue for additional context - - Missing critical security or performance issues - - Not checking for proper i18n in UI changes - - Failing to suggest breaking up large PRs - - Using internal evaluation terminology in public comments - - Not providing actionable suggestions for improvements - - Reviewing only the diff without local context - - Making assumptions instead of asking clarifying questions about potential intentions - - Forgetting to link to specific lines with full GitHub URLs - - Not presenting findings in a clear numbered list format - - Failing to offer the task creation option for addressing suggestions - - Creating tasks without specific context or file references - - Choosing inappropriate modes when creating tasks for suggestions - - Not updating the todo list after completing each step - - Not including --repo flag when using gh commands for non-default repositories - - Using wrong commit_id in review payload (must use headRefOid from PR info) - - Forgetting to specify "side": "RIGHT" for comments on new code - - Using incorrect line numbers that don't match the actual diff - - Not escaping special characters in JSON payload properly - - Not using COMMENT as the event type in the review payload - - Not constructing proper file paths relative to repository root - - Submitting empty comments array when inline comments are needed - - Forgetting to use < \ No newline at end of file diff --git a/.roo/rules-translate/instructions-zh-cn.md b/.roo/rules-translate/instructions-zh-cn.md index 241ae338dc1..b166a1e6a8d 100644 --- a/.roo/rules-translate/instructions-zh-cn.md +++ b/.roo/rules-translate/instructions-zh-cn.md @@ -16,7 +16,6 @@ | Auto-approve | 自动批准 | 始终批准 | 权限相关术语 | | Checkpoint | 存档点 | 检查点/快照 | 技术概念统一 | | MCP Server | MCP 服务 | MCP 服务器 | 技术组件 | -| Human Relay | 人工辅助模式 | 人工中继 | 功能描述清晰 | | Network Timeout | 请求超时 | 网络超时 | 更准确描述 | | Terminal | 终端 | 命令行 | 技术术语统一 | | diff | 差异更新 | 差分/补丁 | 代码变更 | @@ -115,7 +114,7 @@ - 保留英文品牌名 - 技术术语保持一致性 - - 保留英文专有名词:如"AWS Bedrock ARN" + - 保留英文专有名词:如"Amazon Bedrock ARN" 4. **用户操作** - 操作动词统一: diff --git a/.roomodes b/.roomodes index d027cec83fb..01f6ed45050 100644 --- a/.roomodes +++ b/.roomodes @@ -94,19 +94,19 @@ customModes: You are Roo, a documentation analysis specialist with two primary functions: 1. Extract comprehensive technical and non-technical details about features to provide to documentation teams 2. Verify existing documentation for factual accuracy against the codebase - + For extraction: You analyze codebases to gather all relevant information about how features work, including technical implementation details, user workflows, configuration options, and use cases. You organize this information clearly for documentation teams to use. - + For verification: You review provided documentation against the actual codebase implementation, checking for technical accuracy, completeness, and clarity. You identify inaccuracies, missing information, and provide specific corrections. - + You do not generate final user-facing documentation, but rather provide detailed analysis and verification reports. - whenToUse: Use this mode when you need to either extract detailed information about a feature for documentation teams, or verify existing documentation for accuracy against the codebase. + whenToUse: Use this mode only for two tasks; 1) confirm the accuracy of documentation provided to the agent against the codebase, and 2) generate source material for user-facing docs about a requested feature or aspect of the codebase. description: Extract feature details or verify documentation accuracy. groups: - read - - edit - - fileRegex: (DOCS-TEMP-.*\.md$|\.roo/docs-extractor/.*\.md$) - description: Temporary documentation extraction files only + - fileRegex: (EXTRACTION-.*\.md$|VERIFICATION-.*\.md$|DOCS-TEMP-.*\.md$|\.roo/docs-extractor/.*\.md$) + description: Extraction/Verification report files only (source-material), plus legacy DOCS-TEMP - command - mcp - slug: pr-fixer @@ -205,21 +205,6 @@ customModes: - command - mcp source: project - - slug: pr-reviewer - name: 🔍 PR Reviewer - roleDefinition: |- - You are Roo, a pull request reviewer specializing in code quality, structure, and translation consistency. Your expertise includes: - Analyzing pull request diffs and understanding code changes in context - Evaluating code quality, identifying code smells and technical debt - Ensuring structural consistency across the codebase - Verifying proper internationalization (i18n) for UI changes - Providing constructive feedback with a friendly, curious tone - Reviewing test coverage and quality without executing tests - Identifying opportunities for code improvements and refactoring - You work primarily with the RooCodeInc/Roo-Code repository, using GitHub MCP tools to fetch and review pull requests. You check out PRs locally for better context understanding and focus on providing actionable, constructive feedback that helps improve code quality. - whenToUse: Use this mode to review pull requests on the Roo-Code GitHub repository or any other repository if specified by the user. - description: Review PRs for code quality, structure, and i18n compliance. - groups: - - read - - - edit - - fileRegex: \.md$ - description: Markdown files only - - mcp - - command - source: project - slug: mode-writer name: ✍️ Mode Writer roleDefinition: |- diff --git a/CHANGELOG.md b/CHANGELOG.md index 312f5f290ef..d50b41699c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,1176 @@ # Roo Code Changelog +## [3.38.2] - 2025-12-31 + +![3.38.2 Release - Skill Alignment](/releases/3.38.2-release.png) + +- Align skills system with Agent Skills specification (PR #10409 by @hannesrudolph) +- Prevent write_to_file from creating files at truncated paths (PR #10415 by @mrubens and @daniel-lxs) +- Update Cerebras maxTokens to 16384 (PR #10387 by @sebastiand-cerebras) +- Fix rate limit wait display (PR #10389 by @hannesrudolph) +- Remove human-relay provider (PR #10388 by @hannesrudolph) +- Replace Todo Lists video with Context Management video in documentation (PR #10375 by @SannidhyaSah) + +## [3.38.1] - 2025-12-29 + +![3.38.1 Release - Bug Fixes and Stability](/releases/3.38.1-release.png) + +- Fix: Flush pending tool results before condensing context (PR #10379 by @daniel-lxs) +- Fix: Revert mergeToolResultText for OpenAI-compatible providers (PR #10381 by @hannesrudolph) +- Fix: Enforce maxConcurrentFileReads limit in read_file tool (PR #10363 by @roomote) +- Fix: Improve feedback message when read_file is used on a directory (PR #10371 by @roomote) +- Fix: Handle custom tool use similarly to MCP tools for IPC schema purposes (PR #10364 by @jr) +- Fix: Correct GitHub repository URL in marketing page (#10376 by @jishnuteegala, PR #10377 by @roomote) +- Docs: Clarify path to Security Settings in privacy policy (PR #10367 by @roomote) + +## [3.38.0] - 2025-12-27 + +![3.38.0 Release - Skills](/releases/3.38.0-release.png) + +- Add support for [Agent Skills](https://agentskills.io/), enabling reusable packages of prompts, tools, and resources to extend Roo's capabilities (PR #10335 by @mrubens) +- Add optional mode field to slash command front matter, allowing commands to automatically switch to a specific mode when triggered (PR #10344 by @app/roomote) +- Add support for npm packages and .env files to custom tools, allowing custom tools to import dependencies and access environment variables (PR #10336 by @cte) +- Remove simpleReadFileTool feature, streamlining the file reading experience (PR #10254 by @app/roomote) +- Remove OpenRouter Transforms feature (PR #10341 by @app/roomote) +- Fix mergeToolResultText handling in Roo provider (PR #10359 by @mrubens) + +## [3.37.1] - 2025-12-23 + +![3.37.1 Release - Tool Fixes and Provider Improvements](/releases/3.37.1-release.png) + +- Fix: Send native tool definitions by default for OpenAI to ensure proper tool usage (PR #10314 by @hannesrudolph) +- Fix: Preserve reasoning_details shape to prevent malformed responses when processing model output (PR #10313 by @hannesrudolph) +- Fix: Drain queued messages while waiting for ask to prevent message loss (PR #10315 by @hannesrudolph) +- Feat: Add grace retry for empty assistant messages to improve reliability (PR #10297 by @hannesrudolph) +- Feat: Enable mergeToolResultText for all OpenAI-compatible providers for better tool result handling (PR #10299 by @hannesrudolph) +- Feat: Enable mergeToolResultText for Roo Code Cloud provider (PR #10301 by @hannesrudolph) +- Feat: Strengthen native tool-use guidance in prompts for improved model behavior (PR #10311 by @hannesrudolph) +- UX: Account-centric signup flow for improved onboarding experience (PR #10306 by @brunobergher) + +## [3.37.0] - 2025-12-22 + +![3.37.0 Release - Custom Tool Calling](/releases/3.37.0-release.png) + +- Add MiniMax M2.1 and improve environment_details handling for Minimax thinking models (PR #10284 by @hannesrudolph) +- Add GLM-4.7 model with thinking mode support for Zai provider (PR #10282 by @hannesrudolph) +- Add experimental custom tool calling - define custom tools that integrate seamlessly with your AI workflow (PR #10083 by @cte) +- Deprecate XML tool protocol selection and force native tool format for new tasks (PR #10281 by @daniel-lxs) +- Fix: Emit tool_call_end events in OpenAI handler when streaming ends (#10275 by @torxeon, PR #10280 by @daniel-lxs) +- Fix: Emit tool_call_end events in BaseOpenAiCompatibleProvider (PR #10293 by @hannesrudolph) +- Fix: Disable strict mode for MCP tools to preserve optional parameters (PR #10220 by @daniel-lxs) +- Fix: Move array-specific properties into anyOf variant in normalizeToolSchema (PR #10276 by @daniel-lxs) +- Fix: Add CRLF line ending normalization to search_replace and search_and_replace tools (PR #10288 by @hannesrudolph) +- Fix: Add graceful fallback for model parsing in Chutes provider (PR #10279 by @hannesrudolph) +- Fix: Enable Requesty refresh models with credentials (PR #10273 by @daniel-lxs) +- Fix: Improve reasoning_details accumulation and serialization (PR #10285 by @hannesrudolph) +- Fix: Preserve reasoning_content in condense summary for DeepSeek-reasoner (PR #10292 by @hannesrudolph) +- Refactor Zai provider to merge environment_details into tool result instead of system message (PR #10289 by @hannesrudolph) +- Remove parallel_tool_calls parameter from litellm provider (PR #10274 by @roomote) +- Add Cloud Team page with comprehensive team management features (PR #10267 by @roomote) +- Add message log deduper utility for evals (PR #10286 by @hannesrudolph) + +## [3.36.16] - 2025-12-19 + +- Fix: Normalize tool schemas for VS Code LM API to resolve error 400 when using VS Code Language Model API providers (PR #10221 by @hannesrudolph) + +## [3.36.15] - 2025-12-19 + +![3.36.15 Release - 1M Context Window Support](/releases/3.36.15-release.png) + +- Add 1M context window beta support for Claude Sonnet 4 on Vertex AI, enabling significantly larger context for complex tasks (PR #10209 by @hannesrudolph) +- Add native tool calling support for LM Studio and Qwen-Code providers, improving compatibility with local models (PR #10208 by @hannesrudolph) +- Add native tool call defaults for OpenAI-compatible providers, expanding native function calling across more configurations (PR #10213 by @hannesrudolph) +- Enable native tool calls for Requesty provider (PR #10211 by @daniel-lxs) +- Improve API error handling and visibility with clearer error messages and better user feedback (PR #10204 by @brunobergher) +- Add downloadable error diagnostics from chat errors, making it easier to troubleshoot and report issues (PR #10188 by @brunobergher) +- Fix refresh models button not properly flushing the cache, ensuring model lists update correctly (#9682 by @tl-hbk, PR #9870 by @pdecat) +- Fix additionalProperties handling for strict mode compatibility, resolving schema validation issues with certain providers (PR #10210 by @daniel-lxs) + +## [3.36.14] - 2025-12-18 + +![3.36.14 Release - Native Tool Calling for Claude on Vertex AI](/releases/3.36.14-release.png) + +- Add native tool calling support for Claude models on Vertex AI, enabling more efficient and reliable tool interactions (PR #10197 by @hannesrudolph) +- Fix JSON Schema format value stripping for OpenAI compatibility, resolving issues with unsupported format values (PR #10198 by @daniel-lxs) +- Improve "no tools used" error handling with graceful retry mechanism for better reliability when tools fail to execute (PR #10196 by @hannesrudolph) + +## [3.36.13] - 2025-12-18 + +![3.36.13 Release - Native Tool Protocol](/releases/3.36.13-release.png) + +- Change default tool protocol from XML to native for improved reliability and performance (PR #10186 by @mrubens) +- Add native tool support for VS Code Language Model API providers (PR #10191 by @daniel-lxs) +- Lock task tool protocol for consistent task resumption, ensuring tasks resume with the same protocol they started with (PR #10192 by @daniel-lxs) +- Replace edit_file tool alias with actual edit_file tool for improved diff editing capabilities (PR #9983 by @hannesrudolph) +- Fix LiteLLM router models by merging default model info for native tool calling support (PR #10187 by @daniel-lxs) +- Add PostHog exception tracking for consecutive mistake errors to improve error monitoring (PR #10193 by @daniel-lxs) + +## [3.36.12] - 2025-12-18 + +![3.36.12 Release - Better telemetry and Bedrock fixes](/releases/3.36.12-release.png) + +- Fix: Add userAgentAppId to Bedrock embedder for code indexing (#10165 by @jackrein, PR #10166 by @roomote) +- Update OpenAI and Gemini tool preferences for improved model behavior (PR #10170 by @hannesrudolph) +- Extract error messages from JSON payloads for better PostHog error grouping (PR #10163 by @daniel-lxs) + +## [3.36.11] - 2025-12-17 + +![3.36.11 Release - Native Tool Calling Enhancements](/releases/3.36.11-release.png) + +- Add support for Claude Code Provider native tool calling, improving tool execution performance and reliability (PR #10077 by @hannesrudolph) +- Enable native tool calling by default for Z.ai models for better model compatibility (PR #10158 by @app/roomote) +- Enable native tools by default for OpenAI compatible provider to improve tool calling support (PR #10159 by @daniel-lxs) +- Fix: Normalize MCP tool schemas for Bedrock and OpenAI strict mode to ensure proper tool compatibility (PR #10148 by @daniel-lxs) +- Fix: Remove dots and colons from MCP tool names for Bedrock compatibility (PR #10152 by @daniel-lxs) +- Fix: Convert tool_result to XML text when native tools disabled for Bedrock (PR #10155 by @daniel-lxs) +- Fix: Refresh Roo models cache with session token on auth state change to resolve model list refresh issues (PR #10156 by @daniel-lxs) +- Fix: Support AWS GovCloud and China region ARNs in Bedrock provider for expanded regional support (PR #10157 by @app/roomote) + +## [3.36.10] - 2025-12-17 + +![3.36.10 Release - Gemini 3 Flash Preview](/releases/3.36.10-release.png) + +- Add support for Gemini 3 Flash Preview model in the Gemini provider (PR #10151 by @hannesrudolph) +- Implement interleaved thinking mode for DeepSeek Reasoner, enabling streaming reasoning output (PR #9969 by @hannesrudolph) +- Fix: Preserve reasoning_content during tool call sequences in DeepSeek (PR #10141 by @hannesrudolph) +- Fix: Correct token counting for context truncation display (PR #9961 by @hannesrudolph) +- Update Next.js dependency to ~15.2.8 (PR #10140 by @jr) + +## [3.36.9] - 2025-12-15 + +![3.36.9 Release - Cross-Provider Compatibility](/releases/3.36.9-release.png) + +- Fix: Normalize tool call IDs for cross-provider compatibility via OpenRouter, ensuring consistent handling across different AI providers (PR #10102 by @daniel-lxs) +- Fix: Add additionalProperties: false to nested MCP tool schemas, improving schema validation and preventing unexpected properties (PR #10109 by @daniel-lxs) +- Fix: Validate tool_result IDs in delegation resume flow, preventing errors when resuming delegated tasks (PR #10135 by @daniel-lxs) +- Feat: Add full error details to streaming failure dialog, providing more comprehensive information for debugging streaming issues (PR #10131 by @roomote) +- Feat: Improve evals UI with tool groups and duration fix, enhancing the evaluation interface organization and timing accuracy (PR #10133 by @hannesrudolph) + +## [3.36.8] - 2025-12-16 + +![3.36.8 Release - Native Tools Enabled by Default](/releases/3.36.8-release.png) + +- Implement incremental token-budgeted file reading for smarter, more efficient file content retrieval (PR #10052 by @jr) +- Enable native tools by default for multiple providers including OpenAI, Azure, Google, Vertex, and more (PR #10059 by @daniel-lxs) +- Enable native tools by default for Anthropic and add telemetry tracking for tool format usage (PR #10021 by @daniel-lxs) +- Fix: Prevent race condition from deleting wrong API messages during streaming (PR #10113 by @hannesrudolph) +- Fix: Prevent duplicate MCP tools error by deduplicating servers at source (PR #10096 by @daniel-lxs) +- Remove strict ARN validation for Bedrock custom ARN users allowing more flexibility (#10108 by @wisestmumbler, PR #10110 by @roomote) +- Add metadata to error details dialog for improved debugging (PR #10050 by @roomote) +- Add configuration to control public sharing feature (PR #10105 by @mrubens) +- Remove description from Bedrock service tiers for cleaner UI (PR #10118 by @mrubens) +- Fix: Correct link to provider pricing page on web (PR #10107 by @brunobergher) + +## [3.36.7] - 2025-12-15 + +- Improve tool configuration for OpenAI models in OpenRouter (PR #10082 by @hannesrudolph) +- Capture more detailed provider-specific error information from OpenRouter for better debugging (PR #10073 by @jr) +- Add Amazon Nova 2 Lite model to Bedrock provider (#9802 by @Smartsheet-JB-Brown, PR #9830 by @roomote) +- Add AWS Bedrock service tier support (#9874 by @Smartsheet-JB-Brown, PR #9955 by @roomote) +- Remove auto-approve toggles for to-do and retry actions to simplify the approval workflow (PR #10062 by @hannesrudolph) +- Move isToolAllowedForMode out of shared directory for better code organization (PR #10089 by @cte) +- Improve run logs and formatters in web-evals for better evaluation tracking (PR #10081 by @hannesrudolph) + +## [3.36.6] - 2025-12-12 + +![3.36.6 Release - Tool Alias Support](/releases/3.36.6-release.png) + +- Add tool alias support for model-specific tool customization, allowing users to configure how tools are presented to different AI models (PR #9989 by @daniel-lxs) +- Sanitize MCP server and tool names for API compatibility, ensuring special characters don't cause issues with API calls (PR #10054 by @daniel-lxs) +- Improve auto-approve timer visibility in follow-up suggestions for better user awareness of pending actions (PR #10048 by @brunobergher) +- Fix: Cancel auto-approval timeout when user starts typing, preventing accidental auto-approvals during user interaction (PR #9937 by @roomote) +- Add WorkspaceTaskVisibility type for organization cloud settings to support team visibility controls (PR #10020 by @roomote) +- Fix: Extract raw error message from OpenRouter metadata for clearer error reporting (PR #10039 by @daniel-lxs) +- Fix: Show tool protocol dropdown for LiteLLM provider, restoring missing configuration option (PR #10053 by @daniel-lxs) + +## [3.36.5] - 2025-12-11 + +![3.36.5 Release - GPT-5.2](/releases/3.36.5-release.png) + +- Add: GPT-5.2 model to openai-native provider (PR #10024 by @hannesrudolph) +- Add: Toggle for Enter key behavior in chat input allowing users to configure whether Enter sends or creates new line (#8555 by @lmtr0, PR #10002 by @hannesrudolph) +- Add: App version to telemetry exception captures and filter 402 errors (PR #9996 by @daniel-lxs) +- Fix: Handle empty Gemini responses and reasoning loops to prevent infinite retries (PR #10007 by @hannesrudolph) +- Fix: Add missing tool_result blocks to prevent API errors when tool results are expected (PR #10015 by @daniel-lxs) +- Fix: Filter orphaned tool_results when more results than tool_uses to prevent message validation errors (PR #10027 by @daniel-lxs) +- Fix: Add general API endpoints for Z.ai provider (#9879 by @richtong, PR #9894 by @roomote) +- Fix: Apply versioned settings on nightly builds (PR #9997 by @hannesrudolph) +- Remove: Glama provider (PR #9801 by @hannesrudolph) +- Remove: Deprecated list_code_definition_names tool (PR #10005 by @hannesrudolph) + +## [3.36.4] - 2025-12-10 + +![3.36.4 Release - Error Details Modal](/releases/3.36.4-release.png) + +- Add error details modal with on-demand display for improved error visibility when debugging issues (PR #9985 by @roomote) +- Fix: Prevent premature rawChunkTracker clearing for MCP tools, improving reliability of MCP tool streaming (PR #9993 by @daniel-lxs) +- Fix: Filter out 429 rate limit errors from API error telemetry for cleaner metrics (PR #9987 by @daniel-lxs) +- Fix: Correct TODO list display order in chat view to show items in proper sequence (PR #9991 by @roomote) + +## [3.36.3] - 2025-12-09 + +![3.36.3 Release](/releases/3.36.3-release.png) + +- Refactor: Unified context-management architecture with improved UX for better context control (PR #9795 by @hannesrudolph) +- Add new `search_replace` native tool for single-replacement operations with improved editing precision (PR #9918 by @hannesrudolph) +- Streaming tool stats and token usage throttling for better real-time feedback during generation (PR #9926 by @hannesrudolph) +- Add versioned settings support with minPluginVersion gating for Roo provider (PR #9934 by @hannesrudolph) +- Make Architect mode save plans to `/plans` directory and gitignore it (PR #9944 by @brunobergher) +- Add announcement support CTA and social icons to UI (PR #9945 by @hannesrudolph) +- Add ability to save screenshots from the browser tool (PR #9963 by @mrubens) +- Refactor: Decouple tools from system prompt for cleaner architecture (PR #9784 by @daniel-lxs) +- Update DeepSeek models to V3.2 with new pricing (PR #9962 by @hannesrudolph) +- Add minimal and medium reasoning effort levels for Gemini models (PR #9973 by @hannesrudolph) +- Update xAI models catalog with latest model options (PR #9872 by @hannesrudolph) +- Add DeepSeek V3-2 support for Baseten provider (PR #9861 by @AlexKer) +- Tweaks to Baseten model definitions for better defaults (PR #9866 by @mrubens) +- Fix: Add xhigh reasoning effort support for gpt-5.1-codex-max (#9891 by @andrewginns, PR #9900 by @andrewginns) +- Fix: Add Kimi, MiniMax, and Qwen model configurations for Bedrock (#9902 by @jbearak, PR #9905 by @app/roomote) +- Configure tool preferences for xAI models (PR #9923 by @hannesrudolph) +- Default to using native tools when supported on OpenRouter (PR #9878 by @mrubens) +- Fix: Exclude apply_diff from native tools when diffEnabled is false (#9919 by @denis-kudelin, PR #9920 by @app/roomote) +- Fix: Always show tool protocol selector for openai-compatible provider (#9965 by @bozoweed, PR #9966 by @hannesrudolph) +- Fix: Respect explicit supportsReasoningEffort array values for proper model configuration (PR #9970 by @hannesrudolph) +- Add timeout configuration to OpenAI Compatible Provider Client (PR #9898 by @dcbartlett) +- Revert default tool protocol change from xml to native for stability (PR #9956 by @mrubens) +- Remove defaultTemperature from Roo provider configuration (PR #9932 by @mrubens) +- Improve OpenAI error messages to be more useful for debugging (PR #9639 by @mrubens) +- Better error logs for parseToolCall exceptions (PR #9857 by @cte) +- Improve cloud job error logging for RCC provider errors (PR #9924 by @cte) +- Fix: Display actual API error message instead of generic text on retry (PR #9954 by @hannesrudolph) +- Add API error telemetry to OpenRouter provider for better diagnostics (PR #9953 by @daniel-lxs) +- Fix: Sanitize removed/invalid API providers to prevent infinite loop (PR #9869 by @hannesrudolph) +- Fix: Use foreground color for context-management icons (PR #9912 by @hannesrudolph) +- Fix: Suppress 'ask promise was ignored' error in handleError (PR #9914 by @daniel-lxs) +- Fix: Process finish_reason to emit tool_call_end events properly (PR #9927 by @daniel-lxs) +- Fix: Add finish_reason processing to xai.ts provider (PR #9929 by @daniel-lxs) +- Fix: Validate and fix tool_result IDs before API requests (PR #9952 by @daniel-lxs) +- Fix: Return undefined instead of 0 for disabled API timeout (PR #9960 by @hannesrudolph) +- Stop making unnecessary count_tokens requests for better performance (PR #9884 by @mrubens) +- Refactor: Consolidate ThinkingBudget components and fix disable handling (PR #9930 by @hannesrudolph) +- Forbid time estimates in architect mode for more focused planning (PR #9931 by @app/roomote) +- Web: Add product pages (PR #9865 by @brunobergher) +- Make eval runs deleteable in the web UI (PR #9909 by @mrubens) +- Feat: Change defaultToolProtocol default from xml to native (later reverted) (PR #9892 by @app/roomote) + +## [3.36.2] - 2025-12-04 + +![3.36.2 Release - Dynamic API Settings](/releases/3.36.2-release.png) + +- Restrict GPT-5 tool set to apply_patch for improved compatibility (PR #9853 by @hannesrudolph) +- Add dynamic settings support for Roo models from API, allowing model-specific configurations to be fetched dynamically (PR #9852 by @hannesrudolph) +- Fix: Resolve Chutes provider model fetching issue (PR #9854 by @cte) + +## [3.36.1] - 2025-12-04 + +![3.36.1 Release - Message Management & Stability Improvements](/releases/3.36.1-release.png) + +- Add MessageManager layer for centralized history coordination, fixing message synchronization issues (PR #9842 by @hannesrudolph) +- Fix: Prevent cascading truncation loop by only truncating visible messages (PR #9844 by @hannesrudolph) +- Fix: Handle unknown/invalid native tool calls to prevent extension freeze (PR #9834 by @daniel-lxs) +- Always enable reasoning for models that require it (PR #9836 by @cte) +- ChatView: Smoother stick-to-bottom behavior during streaming (PR #8999 by @hannesrudolph) +- UX: Improved error messages and documentation links (PR #9777 by @brunobergher) +- Fix: Overly round follow-up question suggestions styling (PR #9829 by @brunobergher) +- Add symlink support for slash commands in .roo/commands folder (PR #9838 by @mrubens) +- Ignore input to the execa terminal process for safer command execution (PR #9827 by @mrubens) +- Be safer about large file reads (PR #9843 by @jr) +- Add gpt-5.1-codex-max model to OpenAI provider (PR #9848 by @hannesrudolph) +- Evals UI: Add filtering, bulk delete, tool consolidation, and run notes (PR #9837 by @hannesrudolph) +- Evals UI: Add multi-model launch and UI improvements (PR #9845 by @hannesrudolph) +- Web: New pricing page (PR #9821 by @brunobergher) + +## [3.36.0] - 2025-12-04 + +![3.36.0 Release - Rewind Kangaroo](/releases/3.36.0-release.png) + +- Fix: Restore context when rewinding after condense (#8295 by @hannesrudolph, PR #9665 by @hannesrudolph) +- Add reasoning_details support to Roo provider for enhanced model reasoning visibility (PR #9796 by @app/roomote) +- Default to native tools for all models in the Roo provider for improved performance (PR #9811 by @mrubens) +- Enable search_and_replace for Minimax models (PR #9780 by @mrubens) +- Fix: Resolve Vercel AI Gateway model fetching issues (PR #9791 by @cte) +- Fix: Apply conservative max tokens for Cerebras provider (PR #9804 by @sebastiand-cerebras) +- Fix: Remove omission detection logic to eliminate false positives (#9785 by @Michaelzag, PR #9787 by @app/roomote) +- Refactor: Remove deprecated insert_content tool (PR #9751 by @daniel-lxs) +- Chore: Hide parallel tool calls experiment and disable feature (PR #9798 by @hannesrudolph) +- Update next.js documentation site dependencies (PR #9799 by @jr) +- Fix: Correct download count display on homepage (PR #9807 by @mrubens) + +## [3.35.5] - 2025-12-03 + +- Feat: Add provider routing selection for OpenRouter embeddings (#9144 by @SannidhyaSah, PR #9693 by @SannidhyaSah) +- Default Minimax M2 to native tool calling (PR #9778 by @mrubens) +- Sanitize the native tool calls to fix a bug with Gemini (PR #9769 by @mrubens) +- UX: Updates to CloudView (PR #9776 by @roomote) + +## [3.35.4] - 2025-12-02 + +- Fix: Handle malformed native tool calls to prevent hanging (PR #9758 by @daniel-lxs) +- Fix: Remove reasoning toggles for GLM-4.5 and GLM-4.6 on z.ai provider (PR #9752 by @roomote) +- Refactor: Remove line_count parameter from write_to_file tool (PR #9667 by @hannesrudolph) + +## [3.35.3] - 2025-12-02 + +- Switch to new welcome view for improved onboarding experience (PR #9741 by @mrubens) +- Update homepage with latest changes (PR #9675 by @brunobergher) +- Improve privacy for stealth models by adding vendor confidentiality section to system prompt (PR #9742 by @mrubens) + +## [3.35.2] - 2025-12-01 + +![3.35.2 Release - Model Default Temperatures](/releases/3.35.2-release.png) + +- Allow models to contain default temperature settings for provider-specific optimal defaults (PR #9734 by @mrubens) +- Add tag-based native tool calling detection for Roo provider models (PR #9735 by @mrubens) +- Enable native tool support for all LiteLLM models by default (PR #9736 by @mrubens) +- Pass app version to provider for improved request tracking (PR #9730 by @cte) + +## [3.35.1] - 2025-12-01 + +- Fix: Flush pending tool results before task delegation (PR #9726 by @daniel-lxs) +- Improve: Better IPC error logging for easier debugging (PR #9727 by @cte) + +## [3.35.0] - 2025-12-01 + +![3.35.0 Release - Subtasks & Native Tools](/releases/3.35.0-release.png) + +- Metadata-driven subtasks with automatic parent resume and single-open safety for improved task orchestration (#8081 by @hannesrudolph, PR #9090 by @hannesrudolph) +- Native tool calling support expanded across many providers: Bedrock (PR #9698 by @mrubens), Cerebras (PR #9692 by @mrubens), Chutes with auto-detection from API (PR #9715 by @daniel-lxs), DeepInfra (PR #9691 by @mrubens), DeepSeek and Doubao (PR #9671 by @daniel-lxs), Groq (PR #9673 by @daniel-lxs), LiteLLM (PR #9719 by @daniel-lxs), Ollama (PR #9696 by @mrubens), OpenAI-compatible providers (PR #9676 by @daniel-lxs), Requesty (PR #9672 by @daniel-lxs), Unbound (PR #9699 by @mrubens), Vercel AI Gateway (PR #9697 by @mrubens), Vertex Gemini (PR #9678 by @daniel-lxs), and xAI with new Grok 4 Fast and Grok 4.1 Fast models (PR #9690 by @mrubens) +- Fix: Preserve tool_use blocks in summary for parallel tool calls (#9700 by @SilentFlower, PR #9714 by @SilentFlower) +- Default Grok Code Fast to native tools for better performance (PR #9717 by @mrubens) +- UX improvements to the Roo Code Cloud provider-centric onboarding flow (PR #9709 by @brunobergher) +- UX toolbar cleanup and settings consolidation for a cleaner interface (PR #9710 by @brunobergher) +- Add model-specific tool customization via `excludedTools` and `includedTools` configuration (PR #9641 by @daniel-lxs) +- Add new `apply_patch` native tool for more efficient file editing operations (PR #9663 by @hannesrudolph) +- Add new `search_and_replace` tool for batch text replacements across files (PR #9549 by @hannesrudolph) +- Add debug buttons to view API and UI history for troubleshooting (PR #9684 by @hannesrudolph) +- Include tool format in environment details for better context awareness (PR #9661 by @mrubens) +- Fix: Display install count in millions instead of thousands (PR #9677 by @app/roomote) +- Web-evals improvements: add task log viewing, export failed logs, and new run options (PR #9637 by @hannesrudolph) +- Web-evals updates: add kill run functionality (PR #9681 by @hannesrudolph) +- Fix: Prevent navigation buttons from wrapping on smaller screens (PR #9721 by @app/roomote) + +## [3.34.8] - 2025-11-27 + +![3.34.8 Release - Race Condition Fix](/releases/3.34.8-release.png) + +- Fix: Race condition in new_task tool for native protocol (PR #9655 by @daniel-lxs) + +## [3.34.7] - 2025-11-27 + +![3.34.7 Release - More Native Tool Integrations](/releases/3.34.7-release.png) + +- Support native tools in the Anthropic provider for improved tool calling (PR #9644 by @mrubens) +- Enable native tool calling for z.ai models (PR #9645 by @mrubens) +- Enable native tool calling for Moonshot models (PR #9646 by @mrubens) +- Fix: OpenRouter tool calls handling improvements (PR #9642 by @mrubens) +- Fix: OpenRouter GPT-5 strict schema validation for read_file tool (PR #9633 by @daniel-lxs) +- Fix: Create parent directories early in write_to_file to prevent ENOENT errors (#9634 by @ivanenev, PR #9640 by @daniel-lxs) +- Fix: Disable native tools and temperature support for claude-code provider (PR #9643 by @hannesrudolph) +- Add 'taking you to cloud' screen after provider welcome for improved onboarding (PR #9652 by @mrubens) + +## [3.34.6] - 2025-11-26 + +![3.34.6 Release - Bedrock Embeddings](/releases/3.34.6-release.png) + +- Add support for AWS Bedrock embeddings in code indexing (#8658 by @kyle-hobbs, PR #9475 by @ggoranov-smar) +- Add native tool calling support for Mistral provider (PR #9625 by @hannesrudolph) +- Wire MULTIPLE_NATIVE_TOOL_CALLS experiment to OpenAI parallel_tool_calls for parallel tool execution (PR #9621 by @hannesrudolph) +- Add fine grained tool streaming for OpenRouter Anthropic (PR #9629 by @mrubens) +- Allow global inference selection for Bedrock when cross-region is enabled (PR #9616 by @roomote) +- Fix: Filter non-Anthropic content blocks before sending to Vertex API (#9583 by @cardil, PR #9618 by @hannesrudolph) +- Fix: Restore content undefined check in WriteToFileTool.handlePartial() (#9611 by @Lissanro, PR #9614 by @daniel-lxs) +- Fix: Prevent model cache from persisting empty API responses (#9597 by @zx2021210538, PR #9623 by @daniel-lxs) +- Fix: Exclude access_mcp_resource tool when MCP has no resources (PR #9615 by @daniel-lxs) +- Fix: Update default settings for inline terminal and codebase indexing (PR #9622 by @roomote) +- Fix: Convert line_ranges strings to lineRanges objects in native tool calls (PR #9627 by @daniel-lxs) +- Fix: Defer new_task tool_result until subtask completes for native protocol (PR #9628 by @daniel-lxs) + +## [3.34.5] - 2025-11-25 + +![3.34.5 Release - Experimental Parallel Tool Calling](/releases/3.34.5-release.png) + +- Experimental feature to enable multiple native tool calls per turn (PR #9273 by @daniel-lxs) +- Add Bedrock Opus 4.5 to global inference model list (PR #9595 by @roomote) +- Fix: Update API handler when toolProtocol changes (PR #9599 by @mrubens) +- Set native tools as default for minimax-m2 and claude-haiku-4.5 (PR #9586 by @daniel-lxs) +- Make single file read only apply to XML tools (PR #9600 by @mrubens) +- Enhance web-evals dashboard with dynamic tool columns and UX improvements (PR #9592 by @hannesrudolph) +- Revert "Add support for Roo Code Cloud as an embeddings provider" while we fix some issues (PR #9602 by @mrubens) + +## [3.34.4] - 2025-11-25 + +![3.34.4 Release - BFL Image Generation](/releases/3.34.4-release.png) + +- Add new Black Forest Labs image generation models, free on Roo Code Cloud and also available on OpenRouter (PR #9587 and #9589 by @mrubens) +- Fix: Preserve dynamic MCP tool names in native mode API history to prevent tool name mismatches (PR #9559 by @daniel-lxs) +- Fix: Preserve tool_use blocks in summary message during condensing with native tools to maintain conversation context (PR #9582 by @daniel-lxs) + +## [3.34.3] - 2025-11-25 + +![3.34.3 Release - Streaming and Opus 4.5](/releases/3.34.3-release.png) + +- Implement streaming for native tool calls, providing real-time feedback during tool execution (PR #9542 by @daniel-lxs) +- Add Claude Opus 4.5 model to Claude Code provider (PR #9560 by @mrubens) +- Add Claude Opus 4.5 model to Bedrock provider (#9571 by @pisicode, PR #9572 by @roomote) +- Enable caching for Opus 4.5 model to improve performance (#9567 by @iainRedro, PR #9568 by @roomote) +- Add support for Roo Code Cloud as an embeddings provider (PR #9543 by @mrubens) +- Fix ask_followup_question streaming issue and add missing tool cases (PR #9561 by @daniel-lxs) +- Add contact links to About Roo Code settings page (PR #9570 by @roomote) +- Switch from asdf to mise-en-place in bare-metal evals setup script (PR #9548 by @cte) + +## [3.34.2] - 2025-11-24 + +![3.34.2 Release - Opus Conductor](/releases/3.34.2-release.png) + +- Add support for Claude Opus 4.5 in Anthropic and Vertex providers (PR #9541 by @daniel-lxs) +- Add support for Claude Opus 4.5 in OpenRouter with prompt caching and reasoning budget (PR #9540 by @daniel-lxs) +- Add Roo Code Cloud as an image generation provider (PR #9528 by @mrubens) +- Fix: Gracefully skip unsupported content blocks in Gemini transformer (PR #9537 by @daniel-lxs) +- Fix: Flush LiteLLM cache when credentials change on refresh (PR #9536 by @daniel-lxs) +- Fix: Ensure XML parser state matches tool protocol on config update (PR #9535 by @daniel-lxs) +- Update Cerebras models (PR #9527 by @sebastiand-cerebras) +- Fix: Support reasoning_details format for Gemini 3 models (PR #9506 by @daniel-lxs) + +## [3.34.1] - 2025-11-23 + +- Show the prompt for image generation in the UI (PR #9505 by @mrubens) +- Fix double todo list display issue (PR #9517 by @mrubens) +- Add tracking for cloud synced messages (PR #9518 by @mrubens) +- Enable the Roo Code Cloud provider in evals (PR #9492 by @cte) + +## [3.34.0] - 2025-11-21 + +![3.34.0 Release - Browser Use 2.0](/releases/3.34.0-release.png) + +- Add Browser Use 2.0 with enhanced browser interaction capabilities (PR #8941 by @hannesrudolph) +- Add support for Baseten as a new AI provider (PR #9461 by @AlexKer) +- Improve base OpenAI compatible provider with better error handling and configuration (PR #9462 by @mrubens) +- Add provider-oriented welcome screen to improve onboarding experience (PR #9484 by @mrubens) +- Pin Roo provider to the top of the provider list for better discoverability (PR #9485 by @mrubens) +- Enhance native tool descriptions with examples and clarifications for better AI understanding (PR #9486 by @daniel-lxs) +- Fix: Make cancel button immediately responsive during streaming (#9435 by @jwadow, PR #9448 by @daniel-lxs) +- Fix: Resolve apply_diff performance regression from earlier changes (PR #9474 by @daniel-lxs) +- Fix: Implement model cache refresh to prevent stale disk cache issues (PR #9478 by @daniel-lxs) +- Fix: Copy model-level capabilities to OpenRouter endpoint models correctly (PR #9483 by @daniel-lxs) +- Fix: Add fallback to yield tool calls regardless of finish_reason (PR #9476 by @daniel-lxs) + +## [3.33.3] - 2025-11-20 + +![3.33.3 Release - Gemini 3 Pro Image Preview](/releases/3.33.3-release.png) + +- Add Google Gemini 3 Pro Image Preview to image generation models (PR #9440 by @app/roomote) +- Add support for Minimax as Anthropic-compatible provider (PR #9455 by @daniel-lxs) +- Store reasoning in conversation history for all providers (PR #9451 by @daniel-lxs) +- Fix: Improve preserveReasoning flag to control API reasoning inclusion (PR #9453 by @daniel-lxs) +- Fix: Prevent OpenAI Native parallel tool calls for native tool calling (PR #9433 by @hannesrudolph) +- Fix: Improve search and replace symbol parsing (PR #9456 by @daniel-lxs) +- Fix: Send tool_result blocks for skipped tools in native protocol (PR #9457 by @daniel-lxs) +- Fix: Improve markdown formatting and add reasoning support (PR #9458 by @daniel-lxs) +- Fix: Prevent duplicate environment_details when resuming cancelled tasks (PR #9442 by @daniel-lxs) +- Improve read_file tool description with examples (PR #9422 by @daniel-lxs) +- Update glob dependency to ^11.1.0 (PR #9449 by @jr) +- Update tar-fs to 3.1.1 via pnpm override (PR #9450 by @app/roomote) + +## [3.33.2] - 2025-11-19 + +- Enable native tool calling for Gemini provider (PR #9343 by @hannesrudolph) +- Add RCC credit balance display (PR #9386 by @jr) +- Fix: Preserve user images in native tool call results (PR #9401 by @daniel-lxs) +- Perf: Reduce excessive getModel() calls and implement disk cache fallback (PR #9410 by @daniel-lxs) +- Show zero price for free models (PR #9419 by @mrubens) + +## [3.33.1] - 2025-11-18 + +![3.33.1 Release - Native Tool Protocol Fixes](/releases/3.33.1-release.png) + +- Add native tool calling support to OpenAI-compatible (PR #9369 by @mrubens) +- Fix: Resolve native tool protocol race condition causing 400 errors (PR #9363 by @daniel-lxs) +- Fix: Update tools to return structured JSON for native protocol (PR #9373 by @daniel-lxs) +- Fix: Include nativeArgs in tool repetition detection (PR #9377 by @daniel-lxs) +- Fix: Ensure no XML parsing when protocol is native (PR #9371 by @daniel-lxs) +- Fix: Gemini maxOutputTokens and reasoning config (PR #9375 by @hannesrudolph) +- Fix: Gemini thought signature validation and token counting errors (PR #9380 by @hannesrudolph) +- Fix: Exclude XML tool examples from MODES section when native protocol enabled (PR #9367 by @daniel-lxs) +- Retry eval tasks if API instability detected (PR #9365 by @cte) +- Add toolProtocol property to PostHog tool usage telemetry (PR #9374 by @app/roomote) + +## [3.33.0] - 2025-11-18 + +![3.33.0 Release - Twin Kangaroos and the Gemini Constellation](/releases/3.33.0-release.png) + +- Add Gemini 3 Pro Preview model (PR #9357 by @hannesrudolph) +- Improve Google Gemini defaults with better temperature and cost reporting (PR #9327 by @hannesrudolph) +- Enable native tool calling for openai-native provider (PR #9348 by @hannesrudolph) +- Add git status information to environment details (PR #9310 by @daniel-lxs) +- Add tool protocol selector to advanced settings (PR #9324 by @daniel-lxs) +- Implement dynamic tool protocol resolution with proper precedence hierarchy (PR #9286 by @daniel-lxs) +- Move Import/Export functionality to Modes view toolbar and cleanup Mode Edit view (PR #9077 by @hannesrudolph) +- Update cloud agent CTA to point to setup page (PR #9338 by @app/roomote) +- Fix: Prevent duplicate tool_result blocks in native tool protocol (PR #9248 by @daniel-lxs) +- Fix: Format tool responses properly for native protocol (PR #9270 by @daniel-lxs) +- Fix: Centralize toolProtocol configuration checks (PR #9279 by @daniel-lxs) +- Fix: Preserve tool blocks for native protocol in conversation history (PR #9319 by @daniel-lxs) +- Fix: Prevent infinite loop when task_done succeeds (PR #9325 by @daniel-lxs) +- Fix: Sync parser state with profile/model changes (PR #9355 by @daniel-lxs) +- Fix: Pass tool protocol parameter to lineCountTruncationError (PR #9358 by @daniel-lxs) +- Use VSCode theme color for outline button borders (PR #9336 by @app/roomote) +- Replace broken badgen.net badges with shields.io (PR #9318 by @app/roomote) +- Add max git status files setting to evals (PR #9322 by @mrubens) +- Roo Code Cloud Provider pricing page and changes elsewhere (PR #9195 by @brunobergher) + +## [3.32.1] - 2025-11-14 + +![3.32.1 Release - Bug Fixes](/releases/3.32.1-release.png) + +- Fix: Add abort controller for request cancellation in OpenAI native protocol (PR #9276 by @daniel-lxs) +- Fix: Resolve duplicate tool blocks causing 'tool has already been used' error in native protocol mode (PR #9275 by @daniel-lxs) +- Fix: Prevent duplicate tool_result blocks in native protocol mode for read_file (PR #9272 by @daniel-lxs) +- Fix: Correct OpenAI Native handling of encrypted reasoning blocks to prevent errors during condensing (PR #9263 by @hannesrudolph) +- Fix: Disable XML parser for native tool protocol to prevent parsing conflicts (PR #9277 by @daniel-lxs) + +## [3.32.0] - 2025-11-14 + +![3.32.0 Release - GPT-5.1 models and OpenAI prompt caching](/releases/3.32.0-release.png) + +- Feature: Add GPT-5.1 models to OpenAI provider (PR #9252 by @hannesrudolph) +- Feature: Support for OpenAI Responses 24 hour prompt caching (PR #9259 by @hannesrudolph) +- Fix: Repair the share button in the UI (PR #9253 by @hannesrudolph) +- Docs: Include PR numbers in the release guide to improve traceability (PR #9236 by @hannesrudolph) + +## [3.31.3] - 2025-11-13 + +![3.31.3 Release - Kangaroo Decrypting a Message](/releases/3.31.3-release.png) + +- Fix: OpenAI Native encrypted_content handling and remove gpt-5-chat-latest verbosity flag (#9225 by @politsin, PR by @hannesrudolph) +- Fix: Roo Code Cloud provider Anthropic input token normalization to avoid double-counting (thanks @hannesrudolph!) +- Refactor: Rename sliding-window to context-management and truncateConversationIfNeeded to manageContext (thanks @hannesrudolph!) + +## [3.31.2] - 2025-11-12 + +- Fix: Apply updated API profile settings when provider/model unchanged (#9208 by @hannesrudolph, PR by @hannesrudolph) +- Migrate conversation continuity to plugin-side encrypted reasoning items using Responses API for improved reliability (thanks @hannesrudolph!) +- Fix: Include mcpServers in getState() for auto-approval (#9190 by @bozoweed, PR by @daniel-lxs) +- Batch settings updates from the webview to the extension host for improved performance (thanks @cte!) +- Fix: Replace rate-limited badges with badgen.net to improve README reliability (thanks @daniel-lxs!) + +## [3.31.1] - 2025-11-11 + +![3.31.1 Release - Kangaroo Stuck in the Clouds](/releases/3.31.1-release.png) + +- Fix: Prevent command_output ask from blocking in cloud/headless environments (thanks @daniel-lxs!) +- Add IPC command for sending messages to the current task (thanks @mrubens!) +- Fix: Model switch re-applies selected profile, ensuring task configuration stays in sync (#9179 by @hannesrudolph, PR by @hannesrudolph) +- Move auto-approval logic from `ChatView` to `Task` for better architecture (thanks @cte!) +- Add custom Button component with variant system (thanks @brunobergher!) + +## [3.31.0] - 2025-11-07 + +![3.31.0 Release - Todo List and Task Header Improvements](/releases/3.31.0-release.png) + +- Improvements to to-do lists and task headers (thanks @brunobergher!) +- Fix: Prevent crash when streaming chunks have null choices array (thanks @daniel-lxs!) +- Fix: Prevent context condensing on settings save when provider/model unchanged (#4430 by @hannesrudolph, PR by @daniel-lxs) +- Fix: Respect custom OpenRouter URL for all API operations (#8947 by @sstraus, PR by @roomote) +- Add comprehensive error logging to Roo Cloud provider (thanks @daniel-lxs!) +- UX: Less caffeinated kangaroo (thanks @brunobergher!) + +## [3.30.3] - 2025-11-06 + +![3.30.3 Release - Moonshot Brain](/releases/3.30.3-release.png) + +- Feat: Add kimi-k2-thinking model to Moonshot provider (thanks @daniel-lxs!) +- Fix: Auto-retry on empty assistant response to prevent task failures (#9076 by @Akillatech, PR by @daniel-lxs) +- Fix: Use system role for OpenAI Compatible provider when streaming is disabled (#8215 by @whitfin, PR by @roomote) +- Fix: Prevent notification sound on attempt_completion with queued messages (#8537 by @hannesrudolph, PR by @roomote) +- Feat: Auto-switch to imported mode with architect fallback for better mode detection (#8239 by @hannesrudolph, PR by @daniel-lxs) +- Feat: Add MiniMax-M2-Stable model and enable prompt caching (#9070 by @nokaka, PR by @roomote) +- Feat: Improve diff appearance in main chat view (thanks @hannesrudolph!) +- UX: Home screen visuals (thanks @brunobergher!) +- Docs: Clarify that setting 0 disables Error & Repetition Limit (thanks @roomote!) +- Chore: Update dependency @changesets/cli to v2.29.7 (thanks @renovate!) + +## [3.30.2] - 2025-11-05 + +![3.30.2 Release - Eliminating UI Flicker](/releases/3.30.2-release.png) + +- Fix: eliminate UI flicker during task cancellation (thanks @daniel-lxs!) +- Add Global Inference support for Bedrock models (#8750 by @ronyblum, PR by @hannesrudolph) +- Add Qwen3 embedding models (0.6B and 4B) to OpenRouter support (#9058 by @dmarkey, PR by @app/roomote) +- Fix: resolve incorrect commit location when GIT_DIR set in Dev Containers (#4567 by @nonsleepr, PR by @heyseth) +- Fix: keep pinned models fixed at top of scrollable list (#8812 by @XiaoYingYo, PR by @app/roomote) +- Fix: update Opus 4.1 max tokens from 8K to 32K (#9045 by @kaveh-deriv, PR by @app/roomote) +- Set Claude Sonnet 4.5 as default for key providers (thanks @hannesrudolph!) +- Fix: dynamic provider model validation to prevent cross-contamination (#9047 by @NotADev137, PR by @daniel-lxs) +- Fix: Bedrock user agent to report full SDK details (#9031 by @ajjuaire, PR by @ajjuaire) +- Add file path tooltips with centralized PathTooltip component (#8278 by @da2ce7, PR by @daniel-lxs) +- Add conditional test running to pre-push hook (thanks @daniel-lxs!) +- Update Cerebras integration (thanks @sebastiand-cerebras!) + +## [3.30.1] - 2025-11-04 + +- Fix: Correct OpenRouter Mistral model embedding dimension from 3072 to 1536 (thanks @daniel-lxs!) +- Revert: Previous UI flicker fix that caused issues with task resumption (thanks @mrubens!) + +## [3.30.0] - 2025-11-03 + +![3.30.0 Release - PR Fixer](/releases/3.30.0-release.png) + +- Feat: Add OpenRouter embedding provider support (#8972 by @dmarkey, PR by @dmarkey) +- Feat: Add GLM-4.6 model to Fireworks provider (#8752 by @mmealman, PR by @app/roomote) +- Feat: Add MiniMax M2 model to Fireworks provider (#8961 by @dmarkey, PR by @app/roomote) +- Feat: Add preserveReasoning flag to include reasoning in API history (thanks @daniel-lxs!) +- Fix: Prevent message loss during queue drain race condition (#8536 by @hannesrudolph, PR by @daniel-lxs) +- Fix: Capture the reasoning content in base-openai-compatible for GLM 4.6 (thanks @mrubens!) +- Fix: Create new Requesty profile during OAuth (thanks @Thibault00!) +- Fix: Prevent UI flicker and enable resumption after task cancellation (thanks @daniel-lxs!) +- Fix: Cleanup terminal settings tab and change default terminal to inline (thanks @hannesrudolph!) + +## [3.29.5] - 2025-11-01 + +- Fix: Resolve Qdrant codebase_search error by adding keyword index for type field (#8963 by @rossdonald, PR by @app/roomote) +- Fix cost and token tracking between provider styles to ensure accurate usage metrics (thanks @mrubens!) + +## [3.29.4] - 2025-10-30 + +- Feat: Add Minimax Provider (thanks @Maosghoul!) +- Fix: prevent infinite loop when canceling during auto-retry (#8901 by @mini2s, PR by @app/roomote) +- Fix: Enhanced codebase index recovery and reuse ('Start Indexing' button now reuses existing Qdrant index) (#8129 by @jaroslaw-weber, PR by @heyseth) +- Fix: make code index initialization non-blocking at activation (#8777 by @cjlawson02, PR by @daniel-lxs) +- Fix: remove search_and_replace tool from codebase (#8891 by @hannesrudolph, PR by @app/roomote) +- Fix: custom modes under custom path not showing (#8122 by @hannesrudolph, PR by @elianiva) +- Fix: prevent MCP server restart when toggling tool permissions (#8231 by @hannesrudolph, PR by @heyseth) +- Fix: truncate type definition to match max read line (#8149 by @chenxluo, PR by @elianiva) +- Fix: auto-sync enableReasoningEffort with reasoning dropdown selection (thanks @daniel-lxs!) +- Fix: Gate auth-driven Roo model refresh to active provider only (thanks @daniel-lxs!) +- Prevent a noisy cloud agent exception (thanks @cte!) +- Feat: improve @ file search for large projects (#5721 by @Naituw, PR by @daniel-lxs) +- Feat: add zai-glm-4.6 model to Cerebras and set gpt-oss-120b as default (thanks @kevint-cerebras!) +- Feat: rename MCP Errors tab to Logs for mixed-level messages (#8893 by @hannesrudolph, PR by @app/roomote) +- docs(vscode-lm): clarify VS Code LM API integration warning (thanks @hannesrudolph!) + +## [3.29.3] - 2025-10-28 + +- Update Gemini models with latest 09-2025 versions including Gemini 2.5 Pro and Flash (#8485 by @cleacos, PR by @roomote) +- Add reasoning support for Z.ai GLM binary thinking mode (#8465 by @BeWater799, PR by @daniel-lxs) +- Enable reasoning in Roo provider (thanks @mrubens!) +- Add settings to configure time and cost display in system prompt (#8450 by @jaxnb, PR by @roomote) +- Fix: Use max_output_tokens when available in LiteLLM fetcher (#8454 by @fabb, PR by @roomote) +- Fix: Process queued messages after context condensing completes (#8477 by @JosXa, PR by @roomote) +- Fix: Use monotonic clock for rate limiting to prevent timing issues (#7770 by @intermarkec, PR by @chrarnoldus) +- Fix: Resolve checkpoint menu popover overflow (thanks @daniel-lxs!) +- Fix: LiteLLM test failures after merge (thanks @daniel-lxs!) +- Improve UX: Focus textbox and add newlines after adding to context (thanks @mrubens!) + +## [3.29.2] - 2025-10-27 + +- Add support for LongCat-Flash-Thinking-FP8 models in Chutes AI provider (#8425 by @leakless21, PR by @roomote) +- Fix: Remove specific Claude model version from settings descriptions to avoid outdated references (#8435 by @rwydaegh, PR by @roomote) +- Fix: Correct caching logic in Roo provider to improve performance (thanks @mrubens!) +- Fix: Ensure free models don't display pricing information in the UI (thanks @mrubens!) + +## [3.29.1] - 2025-10-26 + +![3.29.1 Release - Window Cleaning](/releases/3.29.1-release.png) + +- Fix: Clean up max output token calculations to prevent context window overruns (#8821 by @enerage, PR by @roomote) +- Fix: Change Add to Context keybinding to avoid Redo conflict (#8652 by @swythan, PR by @roomote) +- Fix provider model loading race conditions (thanks @mrubens!) + +## [3.29.0] - 2025-10-24 + +![3.29.0 Release - Intelligent File Reading](/releases/3.29.0-release.png) + +- Add token-budget based file reading with intelligent preview to avoid context overruns (thanks @daniel-lxs!) +- Enable browser-use tool for all image-capable models (#8116 by @hannesrudolph, PR by @app/roomote!) +- Add dynamic model loading for Roo Code Cloud provider (thanks @app/roomote!) +- Fix: Respect nested .gitignore files in search_files (#7921 by @hannesrudolph, PR by @daniel-lxs) +- Fix: Preserve trailing newlines in stripLineNumbers for apply_diff (#8020 by @liyi3c, PR by @app/roomote) +- Fix: Exclude max tokens field for models that don't support it in export (#7944 by @hannesrudolph, PR by @elianiva) +- Retry API requests on stream failures instead of aborting task (thanks @daniel-lxs!) +- Improve auto-approve button responsiveness (thanks @daniel-lxs!) +- Add checkpoint initialization timeout settings and fix checkpoint timeout warnings (#7843 by @NaccOll, PR by @NaccOll) +- Always show checkpoint restore options regardless of change detection (thanks @daniel-lxs!) +- Improve checkpoint menu translations (thanks @daniel-lxs!) +- Add GLM-4.6-turbo model to chutes ai provider (thanks @mohammad154!) +- Add Claude Haiku 4.5 to prompt caching models (thanks @hannesrudolph!) +- Expand Z.ai model coverage with GLM-4.5-X, AirX, Flash (thanks @hannesrudolph!) +- Update Mistral Medium model name (#8362 by @ThomsenDrake, PR by @ThomsenDrake) +- Remove GPT-5 instructions/reasoning_summary from UI message metadata to prevent ui_messages.json bloat (thanks @hannesrudolph!) +- Normalize docs-extractor audience tags; remove admin/stakeholder; strip tool invocations (thanks @hannesrudolph!) +- Update X/Twitter username from roo_code to roocode (thanks @app/roomote!) +- Update Configuring Profiles video link (thanks @app/roomote!) +- Fix link text for Roomote Control in README (thanks @laz-001!) +- Remove verbose error for cloud agents (thanks @cte!) +- Try 5s status mutation timeout (thanks @cte!) + +## [3.28.18] - 2025-10-17 + +- Fix: Remove request content from UI messages to improve performance and reduce clutter (#5601 by @MuriloFP, #8594 by @multivac2x, #8690 by @hannesrudolph, PR by @mrubens) +- Fix: Prevent file editing issues when git diff views are open (thanks @hassoncs!) +- Fix: Add userAgent to Bedrock client for version tracking (#8660 by @ajjuaire, PR by @app/roomote) +- Feat: Z AI now uses only two coding endpoints for better performance (#8687 by @hannesrudolph) +- Feat: Update image generation model selection for improved quality (thanks @chrarnoldus!) + +## [3.28.17] - 2025-10-15 + +- Add support for Claude Haiku 4.5 model (thanks @daniel-lxs!) +- Fix: Update zh-TW run command title translation (thanks @PeterDaveHello!) + +## [3.28.16] - 2025-10-09 + +![3.28.16 Release - Expanded Context Window](/releases/3.28.16-release.png) + +- feat: Add Claude Sonnet 4.5 1M context window support for Claude Code (thanks @ColbySerpa!) +- feat: Identify cloud tasks in the extension bridge (thanks @cte!) +- fix: Add the parent task ID in telemetry (thanks @mrubens!) + +## [3.28.15] - 2025-10-03 + +![3.28.15 Release - Kangaroo Sliding Down a Chute](/releases/3.28.15-release.png) + +- Add new DeepSeek and GLM models with detailed descriptions to the Chutes provider (thanks @mohammad154!) +- Fix: properly reset cost limit tracking when user clicks "Reset and Continue" (#6889 by @alecoot, PR by app/roomote) +- Fix: improve save button activation in prompts settings (#5780 by @beccare, PR by app/roomote) +- Fix: overeager 'there are unsaved changes' dialog in settings (thanks @brunobergher!) +- Fix: show send button when only images are selected in chat textarea (thanks app/roomote!) +- Fix: Claude Sonnet 4.5 compatibility improvements (thanks @mrubens!) +- Add UsageStats schema and type for better analytics tracking (thanks app/roomote!) +- Include reasoning messages in cloud tasks (thanks @mrubens!) +- Security: update dependency vite to v6.3.6 (thanks app/renovate!) +- Deprecate free grok 4 fast model (thanks @mrubens!) +- Remove unsupported Gemini 2.5 Flash Image Preview free model (thanks @SannidhyaSah!) +- Add structured data to the homepage for better SEO (thanks @mrubens!) +- Update dependency glob to v11.0.3 (thanks app/renovate!) + +## [3.28.14] - 2025-09-30 + +![3.28.14 Release - GLM-4.6 Model Support](/releases/3.28.14-release.png) + +- Add support for GLM-4.6 model for z.ai provider (#8406 by @dmarkey, PR by @roomote) + +## [3.28.13] - 2025-09-29 + +- Fix: Remove topP parameter from Bedrock inference config (#8377 by @ronyblum, PR by @daniel-lxs) +- Fix: Correct Vertex AI Sonnet 4.5 model configuration (#8387 by @nickcatal, PR by @mrubens!) + +## [3.28.12] - 2025-09-29 + +- Fix: Correct Anthropic Sonnet 4.5 model ID and add Bedrock 1M context checkbox (thanks @daniel-lxs!) + +## [3.28.11] - 2025-09-29 + +- Fix: Correct Amazon Bedrock Claude Sonnet 4.5 model identifier (#8371 by @sunhyung, PR by @app/roomote) +- Fix: Correct Claude Sonnet 4.5 model ID format (thanks @daniel-lxs!) + +## [3.28.10] - 2025-09-29 + +![3.28.10 Release - Kangaroo Writing Sonnet 4.5](/releases/3.28.10-release.png) + +- Feat: Add Sonnet 4.5 support (thanks @daniel-lxs!) +- Fix: Resolve max_completion_tokens issue for GPT-5 models in LiteLLM provider (#6979 by @lx1054331851, PR by @roomote) +- Fix: Make chat icons properly sized with shrink-0 class (thanks @mrubens!) +- Enhancement: Track telemetry settings changes for better analytics (thanks @mrubens!) +- Web: Add testimonials section to website (thanks @brunobergher!) +- CI: Refresh contrib.rocks cache workflow for contributor badges (thanks @hannesrudolph!) + +## [3.28.9] - 2025-09-26 + +![3.28.9 Release - Supernova Upgrade](/releases/3.28.9-release.png) + +- The free Supernova model now has a 1M token context window (thanks @mrubens!) +- Experiment to show the Roo provider on the welcome screen (thanks @mrubens!) +- Web: Website improvements to https://roocode.com/ (thanks @brunobergher!) +- Fix: Remove tags from prompts for cleaner output and fewer tokens (#8318 by @hannesrudolph, PR by @app/roomote) +- Correct tool use suggestion to improve model adherence to suggestion (thanks @hannesrudolph!) +- feat: log out from cloud when resetting extension state (thanks @app/roomote!) +- feat: Add telemetry tracking to DismissibleUpsell component (thanks @app/roomote!) +- refactor: remove pr-reviewer mode (thanks @daniel-lxs!) +- Removing user hint when refreshing models (thanks @requesty-JohnCosta27!) + +## [3.28.8] - 2025-09-25 + +![3.28.8 Release - Bug fixes and improvements](/releases/3.28.8-release.png) + +- Fix: Resolve frequent "No tool used" errors by clarifying tool-use rules (thanks @hannesrudolph!) +- Fix: Include initial ask in condense summarization (thanks @hannesrudolph!) +- Add support for more free models in the Roo provider (thanks @mrubens!) +- Show cloud switcher and option to add a team when logged in (thanks @mrubens!) +- Add Opengraph image for web (thanks @brunobergher!) + +## [3.28.7] - 2025-09-23 + +![3.28.7 Release - Hidden Thinking](/releases/3.28.7-release.png) + +- UX: Collapse thinking blocks by default with UI settings to always show them (thanks @brunobergher!) +- Fix: Resolve checkpoint restore popover positioning issue (#8219 by @NaccOll, PR by @app/roomote) +- Add cloud account switcher functionality (thanks @mrubens!) +- Add support for zai-org/GLM-4.5-turbo model in Chutes provider (#8155 by @mugnimaestra, PR by @app/roomote) + +## [3.28.6] - 2025-09-23 + +![3.28.6 Release - Kangaroo studying ancient codex](/releases/3.28.6-release.png) + +- Feat: Add GPT-5-Codex model (thanks @daniel-lxs!) +- Feat: Add keyboard shortcut for toggling auto-approve (Cmd/Ctrl+Alt+A) (thanks @brunobergher!) +- Fix: Improve reasoning block formatting for better readability (thanks @daniel-lxs!) +- Fix: Respect Ollama Modelfile num_ctx configuration (#7797 by @hannesrudolph, PR by @app/roomote) +- Fix: Prevent checkpoint text from wrapping in non-English languages (#8206 by @NaccOll, PR by @app/roomote) +- Remove language selection and word wrap toggle from CodeBlock (thanks @mrubens!) +- Feat: Add package.nls.json checking to find-missing-translations script (thanks @app/roomote!) +- Fix: Bare metal evals fixes (thanks @cte!) +- Fix: Follow-up questions should trigger the "interactive" state (thanks @cte!) + +## [3.28.5] - 2025-09-20 + +![3.28.5 Release - Kangaroo staying hydrated](/releases/3.28.5-release.png) + +- Fix: Resolve duplicate rehydrate during reasoning; centralize rehydrate and preserve cancel metadata (#8153 by @hannesrudolph, PR by @hannesrudolph) +- Add an announcement for Supernova (thanks @mrubens!) +- Wrap code blocks by default for improved readability (thanks @mrubens!) +- Fix: Support dash prefix in parseMarkdownChecklist for todo lists (#8054 by @NaccOll, PR by app/roomote) +- Fix: Apply tiered pricing for Gemini models via Vertex AI (#8017 by @ikumi3, PR by app/roomote) +- Update SambaNova models to latest versions (thanks @snova-jorgep!) +- Update privacy policy to allow occasional emails (thanks @jdilla1277!) + +## [3.28.4] - 2025-09-19 + +![3.28.4 Release - Supernova Discovery](/releases/3.28.4-release.png) + +- UX: Redesigned Message Feed (thanks @brunobergher!) +- UX: Responsive Auto-Approve (thanks @brunobergher!) +- Add telemetry retry queue for network resilience (thanks @daniel-lxs!) +- Fix: Transform keybindings in nightly build to fix command+y shortcut (thanks @app/roomote!) +- New code-supernova stealth model in the Roo Code Cloud provider (thanks @mrubens!) + +## [3.28.3] - 2025-09-16 + +![3.28.3 Release - UI/UX Improvements and Bug Fixes](/releases/3.28.3-release.png) + +- Fix: Filter out Claude Code built-in tools (ExitPlanMode, BashOutput, KillBash) (#7817 by @juliettefournier-econ, PR by @roomote) +- Replace + icon with edit icon for New Task button (#7941 by @hannesrudolph, PR by @roomote) +- Fix: Corrected C# tree-sitter query (#5238 by @vadash, PR by @mubeen-zulfiqar) +- Add keyboard shortcut for "Add to Context" action (#7907 by @hannesrudolph, PR by @roomote) +- Fix: Context menu is obscured when edit message (#7759 by @mini2s, PR by @NaccOll) +- Fix: Handle ByteString conversion errors in OpenAI embedders (#7959 by @PavelA85, PR by @daniel-lxs) +- Add Z.ai coding plan support (thanks @daniel-lxs!) +- Move slash commands to Settings tab with gear icon for discoverability (thanks @roomote!) +- Reposition Add Image button inside ChatTextArea (thanks @roomote!) +- Bring back a way to temporarily and globally pause auto-approve without losing your toggle state (thanks @brunobergher!) +- Makes text area buttons appear only when there's text (thanks @brunobergher!) +- CONTRIBUTING.md tweaks and issue template rewrite (thanks @hannesrudolph!) +- Bump axios from 1.9.0 to 1.12.0 (thanks @dependabot!) + +## [3.28.2] - 2025-09-14 + +![3.28.2 Release - Auto-approve improvements](/releases/3.28.2-release.png) + +- Improve auto-approve UI with smaller and more subtle design (thanks @brunobergher!) +- Fix: Message queue re-queue loop in Task.ask() causing performance issues (#7861 by @hannesrudolph, PR by @daniel-lxs) +- Fix: Restrict @-mention parsing to line-start or whitespace boundaries to prevent false triggers (#7875 by @hannesrudolph, PR by @app/roomote) +- Fix: Make nested git repository warning persistent with path info for better visibility (#7884 by @hannesrudolph, PR by @app/roomote) +- Fix: Include API key in Ollama /api/tags requests for authenticated instances (#7902 by @ItsOnlyBinary, PR by @app/roomote) +- Fix: Preserve original first message context during conversation condensing (thanks @daniel-lxs!) +- Add Qwen3 Next 80B A3B models to chutes provider (thanks @daniel-lxs!) +- Disable Roomote Control on logout for better security (thanks @cte!) +- Add padding to the cloudview for better visual spacing (thanks @mrubens!) + +## [3.28.1] - 2025-09-11 + +![3.28.1 Release - Kangaroo riding rocket to the clouds](/releases/3.28.1-release.png) + +- Announce Roo Code Cloud! +- Add cloud task button for opening tasks in Roo Code Cloud (thanks @app/roomote!) +- Make Posthog telemetry the default (thanks @mrubens!) +- Show notification when the checkpoint initialization fails (thanks @app/roomote!) +- Bust cache in generated image preview (thanks @mrubens!) +- Fix: Center active mode in selector dropdown on open (#7882 by @hannesrudolph, PR by @app/roomote) +- Fix: Preserve first message during conversation condensing (thanks @daniel-lxs!) + +## [3.28.0] - 2025-09-10 + +![3.28.0 Release - Continue tasks in Roo Code Cloud](/releases/3.28.0-release.png) + +- feat: Continue tasks in Roo Code Cloud (thanks @brunobergher!) +- feat: Support connecting to Cloud without redirect handling (thanks @mrubens!) +- feat: Add toggle to control task syncing to Cloud (thanks @jr!) +- feat: Add click-to-edit, ESC-to-cancel, and fix padding consistency for chat messages (#7788 by @hannesrudolph, PR by @app/roomote) +- feat: Make reasoning more visible (thanks @app/roomote!) +- fix: Fix Groq context window display (thanks @mrubens!) +- fix: Add GIT_EDITOR env var to merge-resolver mode for non-interactive rebase (thanks @daniel-lxs!) +- fix: Resolve chat message edit/delete duplication issues (thanks @daniel-lxs!) +- fix: Reduce CodeBlock button z-index to prevent overlap with popovers (#7703 by @A0nameless0man, PR by @daniel-lxs) +- fix: Revert PR #7188 - Restore temperature parameter to fix TabbyApi/ExLlamaV2 crashes (#7581 by @drknyt, PR by @daniel-lxs) +- fix: Make ollama models info transport work like lmstudio (#7674 by @ItsOnlyBinary, PR by @ItsOnlyBinary) +- fix: Update DeepSeek pricing to new unified rates effective Sept 5, 2025 (#7685 by @NaccOll, PR by @app/roomote) +- feat: Update Vertex AI models and regions (#7725 by @ssweens, PR by @ssweens) +- chore: Update dependency eslint-plugin-turbo to v2.5.6 (thanks @app/renovate!) +- chore: Update dependency @changesets/cli to v2.29.6 (thanks @app/renovate!) +- chore: Update dependency nock to v14.0.10 (thanks @app/renovate!) +- chore: Update dependency eslint-config-prettier to v10.1.8 (thanks @app/renovate!) +- chore: Update dependency esbuild to v0.25.9 (thanks @app/renovate!) + +## [3.27.0] - 2025-09-05 + +![3.27.0 Release - Bug Fixes and Improvements](/releases/3.27.0-release.png) + +- Add: User message editing and deletion functionality (thanks @NaccOll!) +- Add: Kimi K2-0905 model support in Chutes provider (#7700 by @pwilkin, PR by @app/roomote) +- Fix: Prevent stack overflow in codebase indexing for large projects (#7588 by @StarTrai1, PR by @daniel-lxs) +- Fix: Resolve race condition in Gemini Grounding Sources by improving code design (#6372 by @daniel-lxs, PR by @HahaBill) +- Fix: Preserve conversation context by retrying with full conversation on invalid previous_response_id (thanks @daniel-lxs!) +- Fix: Identify MCP and slash command config path in multiple folder workspaces (#6720 by @kfuglsang, PR by @NaccOll) +- Fix: Handle array paths from VSCode terminal profiles correctly (#7695 by @Amosvcc, PR by @app/roomote) +- Fix: Improve WelcomeView styling and readability (thanks @daniel-lxs!) +- Fix: Resolve CI e2e test ETIMEDOUT errors when downloading VS Code (thanks @daniel-lxs!) + +## [3.26.7] - 2025-09-04 + +![3.26.7 Release - OpenAI Service Tiers](/releases/3.26.7-release.png) + +- Feature: Add OpenAI Responses API service tiers (flex/priority) with UI selector and pricing (thanks @hannesrudolph!) +- Feature: Add DeepInfra as a model provider in Roo Code (#7661 by @Thachnh, PR by @Thachnh) +- Feature: Update kimi-k2-0905-preview and kimi-k2-turbo-preview models on the Moonshot provider (thanks @CellenLee!) +- Feature: Add kimi-k2-0905-preview to Groq, Moonshot, and Fireworks (thanks @daniel-lxs and Cline!) +- Fix: Prevent countdown timer from showing in history for answered follow-up questions (#7624 by @XuyiK, PR by @daniel-lxs) +- Fix: Moonshot's maximum return token count limited to 1024 issue resolved (#6936 by @greyishsong, PR by @wangxiaolong100) +- Fix: Add error transform to cryptic OpenAI SDK errors when API key is invalid (#7483 by @A0nameless0man, PR by @app/roomote) +- Fix: Validate MCP tool exists before execution (#7631 by @R-omk, PR by @app/roomote) +- Fix: Handle zsh glob qualifiers correctly (thanks @mrubens!) +- Fix: Handle zsh process substitution correctly (thanks @mrubens!) +- Fix: Minor zh-TW Traditional Chinese locale typo fix (thanks @PeterDaveHello!) + +## [3.26.6] - 2025-09-03 + +![3.26.6 Release - Bug Fixes and Tool Improvements](/releases/3.26.6-release.png) + +- Add experimental run_slash_command tool to let the model initiate slash commands (thanks @app/roomote!) +- Fix: use askApproval wrapper in insert_content and search_and_replace tools (#7648 by @hannesrudolph, PR by @app/roomote) +- Add Kimi K2 Turbo model configuration to moonshotModels (thanks @wangxiaolong100!) +- Fix: preserve scroll position when switching tabs in settings (thanks @DC-Dancao!) + +## [3.26.5] - 2025-09-03 + +![3.26.5 Release - Enhanced AI Thinking Capabilities](/releases/3.26.5-release.png) + +- feat: Add support for Qwen3 235B A22B Thinking 2507 model in chutes (thanks @mohammad154!) +- feat: Add auto-approve support for MCP access_resource tool (#7565 by @m-ibm, PR by @daniel-lxs) +- feat: Add configurable embedding batch size for code indexing (#7356 by @BenLampson, PR by @app/roomote) +- fix: Add cache reporting support for OpenAI-Native provider (thanks @hannesrudolph!) +- feat: Move message queue to the extension host for better performance (thanks @cte!) + +## [3.26.4] - 2025-09-01 + +![3.26.4 Release - Memory Optimization](/releases/3.26.4-release.png) + +- Optimize memory usage for image handling in webview (thanks @daniel-lxs!) +- Fix: Special tokens should not break task processing (#7539 by @pwilkin, PR by @pwilkin) +- Add Ollama API key support for Turbo mode (#7147 by @LivioGama, PR by @app/roomote) +- Rename Account tab to Cloud tab for clarity (thanks @app/roomote!) +- Add kangaroo-themed release image generation (thanks @mrubens!) + +## [3.26.3] - 2025-08-29 + +![3.26.3 Release - Kangaroo Photo Editor](/releases/3.26.3-release.png) + +- Add optional input image parameter to image generation tool (thanks @roomote!) +- Refactor: Flatten image generation settings structure (thanks @daniel-lxs!) +- Show console logging in vitests when the --no-silent flag is set (thanks @hassoncs!) + +## [3.26.2] - 2025-08-28 + +![3.26.2 Release - Kangaroo Digital Artist](/releases/3.26.2-release.png) + +- feat: Add experimental image generation tool with OpenRouter integration (thanks @daniel-lxs!) +- Fix: Resolve GPT-5 Responses API issues with condensing and image support (#7334 by @nlbuescher, PR by @daniel-lxs) +- Fix: Hide .rooignore'd files from environment details by default (#7368 by @AlexBlack772, PR by @app/roomote) +- Fix: Exclude browser scroll actions from repetition detection (#7470 by @cgrierson-smartsheet, PR by @app/roomote) + +## [3.26.1] - 2025-08-27 + +![3.26.1 Release - Kangaroo Network Engineer](/releases/3.26.1-release.png) + +- Add Vercel AI Gateway provider integration (thanks @joshualipman123!) +- Add support for Vercel embeddings (thanks @mrubens!) +- Enable on-disk storage for Qdrant vectors and HNSW index (thanks @daniel-lxs!) +- Show model ID in API configuration dropdown (thanks @daniel-lxs!) +- Update tooltip component to match native VSCode tooltip shadow styling (thanks @roomote!) +- Fix: remove duplicate cache display in task header (thanks @mrubens!) +- Random chat text area cleanup (thanks @cte!) + +## [3.26.0] - 2025-08-26 + +![3.26.0 Release - Kangaroo Speed Racer](/releases/3.26.0-release.png) + +- Sonic -> Grok Code Fast +- feat: Add Qwen Code CLI API Support with OAuth Authentication (thanks @evinelias and Cline!) +- feat: Add Deepseek v3.1 to Fireworks AI provider (#7374 by @dmarkey, PR by @app/roomote) +- Add a built-in /init slash command (thanks @mrubens and @hannesrudolph!) +- Fix: Make auto approve toggle trigger stay (#3909 by @kyle-apex, PR by @elianiva) +- Fix: Preserve user input when selecting follow-up choices (#7316 by @teihome, PR by @daniel-lxs) +- Fix: Handle Mistral thinking content as reasoning chunks (#6842 by @Biotrioo, PR by @app/roomote) +- Fix: Resolve newTaskRequireTodos setting not working correctly (thanks @hannesrudolph!) +- Fix: Requesty model listing (#7377 by @dtrugman, PR by @dtrugman) +- feat: Hide static providers with no models from provider list (thanks @daniel-lxs!) +- Add todos parameter to new_task tool usage in issue-fixer mode (thanks @hannesrudolph!) +- Handle substitution patterns in command validation (thanks @mrubens!) +- Mark code-workspace files as protected (thanks @mrubens!) +- Update list of default allowed commands (thanks @mrubens!) +- Follow symlinks in rooignore checks (thanks @mrubens!) +- Show cache read and write prices for OpenRouter inference providers (thanks @chrarnoldus!) +- chore(deps): Update dependency drizzle-kit to v0.31.4 (thanks @app/renovate!) + +## [3.25.23] - 2025-08-22 + +- feat: add custom base URL support for Requesty provider (thanks @requesty-JohnCosta27!) +- feat: add DeepSeek V3.1 model to Chutes AI provider (#7294 by @dmarkey, PR by @app/roomote) +- Revert "feat: enable loading Roo modes from multiple files in .roo/modes directory" temporarily to fix a bug with mode installation + +## [3.25.22] - 2025-08-22 + +- Add prompt caching support for Kimi K2 on Groq (thanks @daniel-lxs and @benank!) +- Add documentation links for global custom instructions in UI (thanks @app/roomote!) + +## [3.25.21] - 2025-08-21 + +- Ensure subtask results are provided to GPT-5 in OpenAI Responses API +- Promote the experimental AssistantMessageParser to the default parser +- Update DeepSeek models context window to 128k (thanks @JuanPerezReal) +- Enable grounding features for Vertex AI (thanks @anguslees) +- Allow orchestrator to pass TODO lists to subtasks +- Improved MDM handling +- Handle nullish token values in ContextCondenseRow to prevent UI crash (thanks @s97712) +- Improved context window error handling for OpenAI and other providers +- Add "installed" filter to Roo Marketplace (thanks @semidark) +- Improve filesystem access checks (thanks @elianiva) +- Support for loading Roo modes from multiple YAML files in the `.roo/modes/` directory (thanks @farazoman) +- Add Featherless provider (thanks @DarinVerheijke) + +## [3.25.20] - 2025-08-19 + +- Add announcement for Sonic model + +## [3.25.19] - 2025-08-19 + +- Fix issue where new users couldn't select the Roo Code Cloud provider (thanks @daniel-lxs!) + +## [3.25.18] - 2025-08-19 + +- Add new stealth Sonic model through the Roo Code Cloud provider +- Fix: respect enableReasoningEffort setting when determining reasoning usage (#7048 by @ikbencasdoei, PR by @app/roomote) +- Fix: prevent duplicate LM Studio models with case-insensitive deduplication (#6954 by @fbuechler, PR by @daniel-lxs) +- Feat: simplify ask_followup_question prompt documentation (thanks @daniel-lxs!) +- Feat: simple read_file tool for single-file-only models (thanks @daniel-lxs!) +- Fix: Add missing zaiApiKey and doubaoApiKey to SECRET_STATE_KEYS (#7082 by @app/roomote) +- Feat: Add new models and update configurations for vscode-lm (thanks @NaccOll!) + +## [3.25.17] - 2025-08-17 + +- Fix: Resolve terminal reuse logic issues + +## [3.25.16] - 2025-08-16 + +- Add support for OpenAI gpt-5-chat-latest model (#7057 by @PeterDaveHello, PR by @app/roomote) +- Fix: Use native Ollama API instead of OpenAI compatibility layer (#7070 by @LivioGama, PR by @daniel-lxs) +- Fix: Prevent XML entity decoding in diff tools (#7107 by @indiesewell, PR by @app/roomote) +- Fix: Add type check before calling .match() on diffItem.content (#6905 by @pwilkin, PR by @app/roomote) +- Refactor task execution system: improve call stack management (thanks @catrielmuller!) +- Fix: Enable save button for provider dropdown and checkbox changes (thanks @daniel-lxs!) +- Add an API for resuming tasks by ID (thanks @mrubens!) +- Emit event when a task ask requires interaction (thanks @cte!) +- Make enhance with task history default to true (thanks @liwilliam2021!) +- Fix: Use cline.cwd as primary source for workspace path in codebaseSearchTool (thanks @NaccOll!) +- Hotfix multiple folder workspace checkpoint (thanks @NaccOll!) + +## [3.25.15] - 2025-08-14 + +- Fix: Remove 500-message limit to prevent scrollbar jumping in long conversations (#7052, #7063 by @daniel-lxs, PR by @app/roomote) +- Fix: Reset condensing state when switching tasks (#6919 by @f14XuanLv, PR by @f14XuanLv) +- Fix: Implement sitemap generation in TypeScript and remove XML file (#5231 by @abumalick, PR by @abumalick) +- Fix: allowedMaxRequests and allowedMaxCost values not showing in the settings UI (thanks @chrarnoldus!) + +## [3.25.14] - 2025-08-13 + +- Fix: Only include verbosity parameter for models that support it (#7054 by @eastonmeth, PR by @app/roomote) +- Fix: Amazon Bedrock 1M context - Move anthropic_beta to additionalModelRequestFields (thanks @daniel-lxs!) +- Fix: Make cancelling requests more responsive by reverting recent changes + +## [3.25.13] - 2025-08-12 + +- Add Sonnet 1M context checkbox to Bedrock +- Fix: add --no-messages flag to ripgrep to suppress file access errors (#6756 by @R-omk, PR by @app/roomote) +- Add support for AGENT.md alongside AGENTS.md (#6912 by @Brendan-Z, PR by @app/roomote) +- Remove deprecated GPT-4.5 Preview model (thanks @PeterDaveHello!) + +## [3.25.12] - 2025-08-12 + +- Update: Claude Sonnet 4 context window configurable to 1 million tokens in Anthropic provider (thanks @daniel-lxs!) +- Add: Minimal reasoning support to OpenRouter (thanks @daniel-lxs!) +- Fix: Add configurable API request timeout for local providers (#6521 by @dabockster, PR by @app/roomote) +- Fix: Add --no-sandbox flag to browser launch options (#6632 by @QuinsZouls, PR by @QuinsZouls) +- Fix: Ensure JSON files respect .rooignore during indexing (#6690 by @evermoving, PR by @app/roomote) +- Add: New Chutes provider models (#6698 by @fstandhartinger, PR by @app/roomote) +- Add: OpenAI gpt-oss models to Amazon Bedrock dropdown (#6752 by @josh-clanton-powerschool, PR by @app/roomote) +- Fix: Correct tool repetition detector to not block first tool call when limit is 1 (#6834 by @NaccOll, PR by @app/roomote) +- Fix: Improve checkpoint service initialization handling (thanks @NaccOll!) +- Update: Improve zh-TW Traditional Chinese locale (thanks @PeterDaveHello!) +- Add: Task expand and collapse translations (thanks @app/roomote!) +- Update: Exclude GPT-5 models from 20% context window output token cap (thanks @app/roomote!) +- Fix: Truncate long model names in model selector to prevent overflow (thanks @app/roomote!) +- Add: Requesty base url support (thanks @requesty-JohnCosta27!) + +## [3.25.11] - 2025-08-11 + +- Add: Native OpenAI provider support for Codex Mini model (#5386 by @KJ7LNW, PR by @daniel-lxs) +- Add: IO Intelligence Provider support (thanks @ertan2002!) +- Fix: MCP startup issues and remove refresh notifications (thanks @hannesrudolph!) +- Fix: Improvements to GPT-5 OpenAI provider configuration (thanks @hannesrudolph!) +- Fix: Clarify codebase_search path parameter as optional and improve tool descriptions (thanks @app/roomote!) +- Fix: Bedrock provider workaround for LiteLLM passthrough issues (thanks @jr!) +- Fix: Token usage and cost being underreported on cancelled requests (thanks @chrarnoldus!) + +## [3.25.10] - 2025-08-07 + +- Add support for GPT-5 (thanks Cline and @app/roomote!) +- Fix: Use CDATA sections in XML examples to prevent parser errors (#4852 by @hannesrudolph, PR by @hannesrudolph) +- Fix: Add missing MCP error translation keys (thanks @app/roomote!) + +## [3.25.9] - 2025-08-07 + +- Fix: Resolve rounding issue with max tokens (#6806 by @markp018, PR by @mrubens) +- Add support for GLM-4.5 and OpenAI gpt-oss models in Fireworks provider (#6753 by @alexfarlander, PR by @app/roomote) +- Improve UX by focusing chat input when clicking plus button in extension menu (thanks @app/roomote!) + +## [3.25.8] - 2025-08-06 + +- Fix: Prevent disabled MCP servers from starting processes and show correct status (#6036 by @hannesrudolph, PR by @app/roomote) +- Fix: Handle current directory path "." correctly in codebase_search tool (#6514 by @hannesrudolph, PR by @app/roomote) +- Fix: Trim whitespace from OpenAI base URL to fix model detection (#6559 by @vauhochzett, PR by @app/roomote) +- Feat: Reduce Gemini 2.5 Pro minimum thinking budget to 128 (thanks @app/roomote!) +- Fix: Improve handling of net::ERR_ABORTED errors in URL fetching (#6632 by @QuinsZouls, PR by @app/roomote) +- Fix: Recover from error state when Qdrant becomes available (#6660 by @hannesrudolph, PR by @app/roomote) +- Fix: Resolve memory leak in ChatView virtual scrolling implementation (thanks @xyOz-dev!) +- Add: Swift files to fallback list (#5857 by @niteshbalusu11, #6555 by @sealad886, PR by @niteshbalusu11) +- Feat: Clamp default model max tokens to 20% of context window (thanks @mrubens!) + +## [3.25.7] - 2025-08-05 + +- Add support for Claude Opus 4.1 +- Add Fireworks AI provider (#6653 by @ershang-fireworks, PR by @ershang-fireworks) +- Add Z AI provider (thanks @jues!) +- Add Groq support for GPT-OSS +- Add Cerebras support for GPT-OSS +- Add code indexing support for multiple folders similar to task history (#6197 by @NaccOll, PR by @NaccOll) +- Make mode selection dropdowns responsive (#6423 by @AyazKaan, PR by @AyazKaan) +- Redesigned task header and task history (thanks @brunobergher!) +- Fix checkpoints timing and ensure checkpoints work properly (#4827 by @mrubens, PR by @NaccOll) +- Fix empty mode names from being saved (#5766 by @kfxmvp, PR by @app/roomote) +- Fix MCP server creation when setting is disabled (#6607 by @characharm, PR by @app/roomote) +- Update highlight layer style and align to textarea (#6647 by @NaccOll, PR by @NaccOll) +- Fix UI for approving chained commands +- Use assistantMessageParser class instead of parseAssistantMessage (#5340 by @qdaxb, PR by @qdaxb) +- Conditionally include reminder section based on todo list config (thanks @NaccOll!) +- Task and TaskProvider event emitter cleanup with new events (thanks @cte!) + ## [3.25.6] - 2025-08-01 - Set horizon-beta model max tokens to 32k for OpenRouter (requested by @hannesrudolph, PR by @app/roomote) @@ -285,7 +1456,7 @@ - Add user-configurable search score threshold slider for semantic search (thanks @hannesrudolph!) - Add default headers and testing for litellm fetcher (thanks @andrewshu2000!) - Fix consistent cancellation error messages for thinking vs streaming phases -- Fix AWS Bedrock cross-region inference profile mapping (thanks @KevinZhao!) +- Fix Amazon Bedrock cross-region inference profile mapping (thanks @KevinZhao!) - Fix URL loading timeout issues in @ mentions (thanks @MuriloFP!) - Fix API retry exponential backoff capped at 10 minutes (thanks @MuriloFP!) - Fix Qdrant URL field auto-filling with default value (thanks @SannidhyaSah!) @@ -299,7 +1470,7 @@ - Suppress Mermaid error rendering - Improve Mermaid buttons with light background in light mode (thanks @chrarnoldus!) - Add .vscode/ to write-protected files/directories -- Update AWS Bedrock cross-region inference profile mapping (thanks @KevinZhao!) +- Update Amazon Bedrock cross-region inference profile mapping (thanks @KevinZhao!) ## [3.22.5] - 2025-06-28 @@ -923,7 +2094,7 @@ - Improved display of diff errors + easy copying for investigation - Fixes to .vscodeignore (thanks @franekp!) - Fix a zh-CN translation for model capabilities (thanks @zhangtony239!) -- Rename AWS Bedrock to Amazon Bedrock (thanks @ronyblum!) +- Rename Amazon Bedrock to Amazon Bedrock (thanks @ronyblum!) - Update extension title and description (thanks @StevenTCramer!) ## [3.11.12] - 2025-04-09 @@ -1172,12 +2343,12 @@ - PowerShell-specific command handling (thanks @KJ7LNW!) - OpenAI-compatible DeepSeek/QwQ reasoning support (thanks @lightrabbit!) - Anthropic-style prompt caching in the OpenAI-compatible provider (thanks @dleen!) -- Add Deepseek R1 for AWS Bedrock (thanks @ATempsch!) +- Add Deepseek R1 for Amazon Bedrock (thanks @ATempsch!) - Fix MarkdownBlock text color for Dark High Contrast theme (thanks @cannuri!) - Add gemini-2.0-pro-exp-02-05 model to vertex (thanks @shohei-ihaya!) - Bring back progress status for multi-diff edits (thanks @qdaxb!) - Refactor alert dialog styles to use the correct vscode theme (thanks @cannuri!) -- Custom ARNs in AWS Bedrock (thanks @Smartsheet-JB-Brown!) +- Custom ARNs in Amazon Bedrock (thanks @Smartsheet-JB-Brown!) - Update MCP servers directory path for platform compatibility (thanks @hannesrudolph!) - Fix browser system prompt inclusion rules (thanks @cannuri!) - Publish git tags to github from CI (thanks @pdecat!) @@ -1315,7 +2486,7 @@ ## [3.7.1] - 2025-02-24 -- Add AWS Bedrock support for Sonnet 3.7 and update some defaults to Sonnet 3.7 instead of 3.5 +- Add Amazon Bedrock support for Sonnet 3.7 and update some defaults to Sonnet 3.7 instead of 3.5 ## [3.7.0] - 2025-02-24 @@ -1332,7 +2503,7 @@ ## [3.3.24] - 2025-02-20 -- Fixed a bug with region selection preventing AWS Bedrock profiles from being saved (thanks @oprstchn!) +- Fixed a bug with region selection preventing Amazon Bedrock profiles from being saved (thanks @oprstchn!) - Updated the price of gpt-4o (thanks @marvijo-code!) ## [3.3.23] - 2025-02-20 @@ -1516,7 +2687,7 @@ - Reverts provider key entry back to checking onInput instead of onChange to hopefully address issues entering API keys (thanks @samhvw8!) - Added explicit checkbox to use Azure for OpenAI compatible providers (thanks @samhvw8!) - Fixed Glama usage reporting (thanks @punkpeye!) -- Added Llama 3.3 70B Instruct model to the AWS Bedrock provider options (thanks @Premshay!) +- Added Llama 3.3 70B Instruct model to the Amazon Bedrock provider options (thanks @Premshay!) ## [3.2.7] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c23c424f414..869b59a16da 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -67,25 +67,27 @@ Mention alignment with these areas in your PRs. ### Issue-First Approach -All contributions must begin with a GitHub Issue. +All contributions start with a GitHub Issue using our skinny templates. - **Check existing issues**: Search [GitHub Issues](https://github.com/RooCodeInc/Roo-Code/issues). -- **Create an issue**: Use appropriate templates: - - **Bugs:** "Bug Report" template. - - **Features:** "Detailed Feature Proposal" template. Approval required before starting. -- **Claim issues**: Comment and await official assignment. - -**PRs without approved issues may be closed.** +- **Create an issue** using: + - **Enhancements:** "Enhancement Request" template (plain language focused on user benefit). + - **Bugs:** "Bug Report" template (minimal repro + expected vs actual + version). +- **Want to work on it?** Comment "Claiming" on the issue and DM **Hannes Rudolph (`hrudolph`)** on [Discord](https://discord.gg/roocode) to get assigned. Assignment will be confirmed in the thread. +- **PRs must link to the issue.** Unlinked PRs may be closed. ### Deciding What to Work On -- Check the [GitHub Project](https://github.com/orgs/RooCodeInc/projects/1) for unassigned "Good First Issues." +- Check the [GitHub Project](https://github.com/orgs/RooCodeInc/projects/1) for "Issue [Unassigned]" issues. - For docs, visit [Roo Code Docs](https://github.com/RooCodeInc/Roo-Code-Docs). ### Reporting Bugs - Check for existing reports first. -- Create new bugs using the ["Bug Report" template](https://github.com/RooCodeInc/Roo-Code/issues/new/choose). +- Create a new bug using the ["Bug Report" template](https://github.com/RooCodeInc/Roo-Code/issues/new/choose) with: + - Clear, numbered reproduction steps + - Expected vs actual result + - Roo Code version (required); API provider/model if relevant - **Security issues**: Report privately via [security advisories](https://github.com/RooCodeInc/Roo-Code/security/advisories/new). ## Development & Submission Process @@ -118,13 +120,14 @@ pnpm install - Begin as a **Draft PR** if seeking early feedback. - Clearly describe your changes following the Pull Request Template. +- Link the issue in the PR description/title (e.g., "Fixes #123"). - Provide screenshots/videos for UI changes. - Indicate if documentation updates are necessary. ### Pull Request Policy -- Must reference pre-approved, assigned issues. -- PRs without adherence to the policy may be closed. +- Must reference an assigned GitHub Issue. To get assigned: comment "Claiming" on the issue and DM **Hannes Rudolph (`hrudolph`)** on [Discord](https://discord.gg/roocode). Assignment will be confirmed in the thread. +- Unlinked PRs may be closed. - PRs should pass CI tests, align with the roadmap, and have clear documentation. ### Review Process diff --git a/PRIVACY.md b/PRIVACY.md index 306d52f0590..02e8e151034 100644 --- a/PRIVACY.md +++ b/PRIVACY.md @@ -1,28 +1,28 @@ # Roo Code Privacy Policy -**Last Updated: June 10th, 2025** +**Last Updated: September 11th, 2025** Roo Code respects your privacy and is committed to transparency about how we handle your data. Below is a simple breakdown of where key pieces of data go—and, importantly, where they don’t. ### **Where Your Data Goes (And Where It Doesn’t)** -- **Code & Files**: Roo Code accesses files on your local machine when needed for AI-assisted features. When you send commands to Roo Code, relevant files may be transmitted to your chosen AI model provider (e.g., OpenAI, Anthropic, OpenRouter) to generate responses. We do not have access to this data, but AI providers may store it per their privacy policies. +- **Code & Files**: Roo Code accesses files on your local machine when needed for AI-assisted features. When you send commands to Roo Code, relevant files may be transmitted to your chosen AI model provider (e.g., OpenAI, Anthropic, OpenRouter) to generate responses. If you select Roo Code Cloud as the model provider (proxy mode), your code may transit Roo Code servers only to forward it to the upstream provider. We do not store your code; it is deleted immediately after forwarding. Otherwise, your code is sent directly to the provider. AI providers may store data per their privacy policies. - **Commands**: Any commands executed through Roo Code happen on your local environment. However, when you use AI-powered features, the relevant code and context from your commands may be transmitted to your chosen AI model provider (e.g., OpenAI, Anthropic, OpenRouter) to generate responses. We do not have access to or store this data, but AI providers may process it per their privacy policies. -- **Prompts & AI Requests**: When you use AI-powered features, your prompts and relevant project context are sent to your chosen AI model provider (e.g., OpenAI, Anthropic, OpenRouter) to generate responses. We do not store or process this data. These AI providers have their own privacy policies and may store data per their terms of service. +- **Prompts & AI Requests**: When you use AI-powered features, your prompts and relevant project context are sent to your chosen AI model provider (e.g., OpenAI, Anthropic, OpenRouter) to generate responses. We do not store or process this data. These AI providers have their own privacy policies and may store data per their terms of service. If you choose Roo Code Cloud as the provider (proxy mode), prompts may transit Roo Code servers only to forward them to the upstream model and are not stored. - **API Keys & Credentials**: If you enter an API key (e.g., to connect an AI model), it is stored locally on your device and never sent to us or any third party, except the provider you have chosen. -- **Telemetry (Usage Data)**: We only collect feature usage and error data if you explicitly opt-in. This telemetry is powered by PostHog and helps us understand feature usage to improve Roo Code. This includes your VS Code machine ID and feature usage patterns and exception reports. We do **not** collect personally identifiable information, your code, or AI prompts. -- **Marketplace Requests**: When you browse or search the Marketplace for Model Configuration Profiles (MCPs) or Custom Modes, Roo Code makes a secure API call to Roo Code’s backend servers to retrieve listing information. These requests send only the query parameters (e.g., extension version, search term) necessary to fulfill the request and do not include your code, prompts, or personally identifiable information. +- **Telemetry (Usage Data)**: We collect anonymous feature usage and error data to help us improve Roo Code. This telemetry is powered by PostHog and includes your VS Code machine ID, feature usage patterns, and exception reports. This telemetry does **not** collect personally identifiable information, your code, or AI prompts. You can opt out of this telemetry at any time through the settings. +- **Marketplace Requests**: When you browse or search the Marketplace for Model Configuration Profiles (MCPs) or Custom Modes, Roo Code makes a secure API call to Roo Code's backend servers to retrieve listing information. These requests send only the query parameters (e.g., extension version, search term) necessary to fulfill the request and do not include your code, prompts, or personally identifiable information. ### **How We Use Your Data (If Collected)** -- If you opt-in to telemetry, we use it to understand feature usage and improve Roo Code. +- We use telemetry to understand feature usage and improve Roo Code. - We do **not** sell or share your data. - We do **not** train any models on your data. ### **Your Choices & Control** - You can run models locally to prevent data being sent to third-parties. -- By default, telemetry collection is off and if you turn it on, you can opt out of telemetry at any time. +- Telemetry collection is enabled by default to help us improve Roo Code, but you can opt out at any time through the settings. - You can delete Roo Code to stop all data collection. ### **Security & Updates** diff --git a/README.md b/README.md index 08f8f818067..75f37762f93 100644 --- a/README.md +++ b/README.md @@ -1,124 +1,89 @@ -
- - -English • [Català](locales/ca/README.md) • [Deutsch](locales/de/README.md) • [Español](locales/es/README.md) • [Français](locales/fr/README.md) • [हिंदी](locales/hi/README.md) • [Bahasa Indonesia](locales/id/README.md) • [Italiano](locales/it/README.md) • [日本語](locales/ja/README.md) - - - - -[한국어](locales/ko/README.md) • [Nederlands](locales/nl/README.md) • [Polski](locales/pl/README.md) • [Português (BR)](locales/pt-BR/README.md) • [Русский](locales/ru/README.md) • [Türkçe](locales/tr/README.md) • [Tiếng Việt](locales/vi/README.md) • [简体中文](locales/zh-CN/README.md) • [繁體中文](locales/zh-TW/README.md) - - -
-
-
-

Roo Code

-

- -

-

Connect with developers, contribute ideas, and stay ahead with the latest AI-powered coding tools.

- - Join Discord - Join Reddit - -
-
-
- -
- -Download on VS Marketplace -Feature Requests -Rate & Review -Documentation - -
- -**Roo Code** is an AI-powered **autonomous coding agent** that lives in your editor. It can: - -- Communicate in natural language -- Read and write files directly in your workspace -- Run terminal commands -- Automate browser actions -- Integrate with any OpenAI-compatible or custom API/model -- Adapt its “personality” and capabilities through **Custom Modes** - -Whether you’re seeking a flexible coding partner, a system architect, or specialized roles like a QA engineer or product manager, Roo Code can help you build software more efficiently. - -Check out the [CHANGELOG](CHANGELOG.md) for detailed updates and fixes. +

+ VS Code Marketplace + X + YouTube + Join Discord + Join r/RooCode +

+

+ Get help fast → Join Discord • Prefer async? → Join r/RooCode +

+ +# Roo Code + +> Your AI-Powered Dev Team, Right in Your Editor + +
+ 🌐 Available languages + +- [English](README.md) +- [Català](locales/ca/README.md) +- [Deutsch](locales/de/README.md) +- [Español](locales/es/README.md) +- [Français](locales/fr/README.md) +- [हिंदी](locales/hi/README.md) +- [Bahasa Indonesia](locales/id/README.md) +- [Italiano](locales/it/README.md) +- [日本語](locales/ja/README.md) +- [한국어](locales/ko/README.md) +- [Nederlands](locales/nl/README.md) +- [Polski](locales/pl/README.md) +- [Português (BR)](locales/pt-BR/README.md) +- [Русский](locales/ru/README.md) +- [Türkçe](locales/tr/README.md) +- [Tiếng Việt](locales/vi/README.md) +- [简体中文](locales/zh-CN/README.md) +- [繁體中文](locales/zh-TW/README.md) +- ... +
--- -## 🎉 Roo Code 3.25 Released +## What Can Roo Code Do For YOU? -Roo Code 3.25 brings powerful new features and significant improvements to enhance your development workflow! +- Generate Code from natural language descriptions and specs +- Adapt with Modes: Code, Architect, Ask, Debug, and Custom Modes +- Refactor & Debug existing code +- Write & Update documentation +- Answer Questions about your codebase +- Automate repetitive tasks +- Utilize MCP Servers -- **Message Queueing** - Queue multiple messages while Roo is working, allowing you to continue planning your workflow without interruption. -- **Custom Slash Commands** - Create personalized slash commands for quick access to frequently used prompts and workflows, with full UI management. -- **Enhanced Gemini Tools** - New URL context and Google Search grounding capabilities provide Gemini models with real-time web information and enhanced research abilities. +## Modes ---- - -## What Can Roo Code Do? - -- 🚀 **Generate Code** from natural language descriptions -- 🔧 **Refactor & Debug** existing code -- 📝 **Write & Update** documentation -- 🤔 **Answer Questions** about your codebase -- 🔄 **Automate** repetitive tasks -- 🏗️ **Create** new files and projects - -## Quick Start - -1. [Install Roo Code](https://docs.roocode.com/getting-started/installing) -2. [Connect Your AI Provider](https://docs.roocode.com/getting-started/connecting-api-provider) -3. [Try Your First Task](https://docs.roocode.com/getting-started/your-first-task) - -## Key Features - -### Multiple Modes - -Roo Code adapts to your needs with specialized [modes](https://docs.roocode.com/basic-usage/using-modes): +Roo Code adapts to how you work: -- **Code Mode:** For general-purpose coding tasks -- **Architect Mode:** For planning and technical leadership -- **Ask Mode:** For answering questions and providing information -- **Debug Mode:** For systematic problem diagnosis -- **[Custom Modes](https://docs.roocode.com/advanced-usage/custom-modes):** Create unlimited specialized personas for security auditing, performance optimization, documentation, or any other task +- Code Mode: everyday coding, edits, and file ops +- Architect Mode: plan systems, specs, and migrations +- Ask Mode: fast answers, explanations, and docs +- Debug Mode: trace issues, add logs, isolate root causes +- Custom Modes: build specialized modes for your team or workflow +- Roomote Control: Roomote Control lets you remotely control tasks running in your local VS Code instance. -### Smart Tools +Learn more: [Using Modes](https://docs.roocode.com/basic-usage/using-modes) • [Custom Modes](https://docs.roocode.com/advanced-usage/custom-modes) • [Roomote Control](https://docs.roocode.com/roo-code-cloud/roomote-control) -Roo Code comes with powerful [tools](https://docs.roocode.com/basic-usage/how-tools-work) that can: +## Tutorial & Feature Videos -- Read and write files in your project -- Execute commands in your VS Code terminal -- Control a web browser -- Use external tools via [MCP (Model Context Protocol)](https://docs.roocode.com/advanced-usage/mcp) - -MCP extends Roo Code's capabilities by allowing you to add unlimited custom tools. Integrate with external APIs, connect to databases, or create specialized development tools - MCP provides the framework to expand Roo Code's functionality to meet your specific needs. - -### Customization +
-Make Roo Code work your way with: +| | | | +| :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +|
Installing Roo Code |
Configuring Profiles |
Codebase Indexing | +|
Custom Modes |
Checkpoints |
Context Management | -- [Custom Instructions](https://docs.roocode.com/advanced-usage/custom-instructions) for personalized behavior -- [Custom Modes](https://docs.roocode.com/advanced-usage/custom-modes) for specialized tasks -- [Local Models](https://docs.roocode.com/advanced-usage/local-models) for offline use -- [Auto-Approval Settings](https://docs.roocode.com/advanced-usage/auto-approving-actions) for faster workflows +
+

+More quick tutorial and feature videos... +

## Resources -### Documentation - -- [Basic Usage Guide](https://docs.roocode.com/basic-usage/the-chat-interface) -- [Advanced Features](https://docs.roocode.com/advanced-usage/auto-approving-actions) -- [Frequently Asked Questions](https://docs.roocode.com/faq) - -### Community - -- **Discord:** [Join our Discord server](https://discord.gg/roocode) for real-time help and discussions -- **Reddit:** [Visit our subreddit](https://www.reddit.com/r/RooCode) to share experiences and tips -- **GitHub:** Report [issues](https://github.com/RooCodeInc/Roo-Code/issues) or request [features](https://github.com/RooCodeInc/Roo-Code/discussions/categories/feature-requests?discussions_q=is%3Aopen+category%3A%22Feature+Requests%22+sort%3Atop) +- **[Documentation](https://docs.roocode.com):** The official guide to installing, configuring, and mastering Roo Code. +- **[YouTube Channel](https://youtube.com/@roocodeyt?feature=shared):** Watch tutorials and see features in action. +- **[Discord Server](https://discord.gg/roocode):** Join the community for real-time help and discussion. +- **[Reddit Community](https://www.reddit.com/r/RooCode):** Share your experiences and see what others are building. +- **[GitHub Issues](https://github.com/RooCodeInc/Roo-Code/issues):** Report bugs and track development. +- **[Feature Requests](https://github.com/RooCodeInc/Roo-Code/discussions/categories/feature-requests?discussions_q=is%3Aopen+category%3A%22Feature+Requests%22+sort%3Atop):** Have an idea? Share it with the developers. --- @@ -202,55 +167,6 @@ We love community contributions! Get started by reading our [CONTRIBUTING.md](CO --- -## Contributors - -Thanks to all our contributors who have helped make Roo Code better! - - - -| mrubens
mrubens
| saoudrizwan
saoudrizwan
| cte
cte
| daniel-lxs
daniel-lxs
| samhvw8
samhvw8
| hannesrudolph
hannesrudolph
| -| :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| KJ7LNW
KJ7LNW
| a8trejo
a8trejo
| ColemanRoo
ColemanRoo
| MuriloFP
MuriloFP
| canrobins13
canrobins13
| stea9499
stea9499
| -| joemanley201
joemanley201
| jr
jr
| System233
System233
| nissa-seru
nissa-seru
| jquanton
jquanton
| roomote-agent
roomote-agent
| -| NyxJae
NyxJae
| elianiva
elianiva
| d-oit
d-oit
| punkpeye
punkpeye
| wkordalski
wkordalski
| qdaxb
qdaxb
| -| xyOz-dev
xyOz-dev
| chrarnoldus
chrarnoldus
| SannidhyaSah
SannidhyaSah
| sachasayan
sachasayan
| Smartsheet-JB-Brown
Smartsheet-JB-Brown
| monotykamary
monotykamary
| -| cannuri
cannuri
| feifei325
feifei325
| zhangtony239
zhangtony239
| shariqriazz
shariqriazz
| vigneshsubbiah16
vigneshsubbiah16
| pugazhendhi-m
pugazhendhi-m
| -| lloydchang
lloydchang
| liwilliam2021
liwilliam2021
| dtrugman
dtrugman
| hassoncs
hassoncs
| Szpadel
Szpadel
| lupuletic
lupuletic
| -| kiwina
kiwina
| Premshay
Premshay
| psv2522
psv2522
| olweraltuve
olweraltuve
| diarmidmackenzie
diarmidmackenzie
| ChuKhaLi
ChuKhaLi
| -| PeterDaveHello
PeterDaveHello
| aheizi
aheizi
| nbihan-mediware
nbihan-mediware
| noritaka1166
noritaka1166
| RaySinner
RaySinner
| afshawnlotfi
afshawnlotfi
| -| dleffel
dleffel
| StevenTCramer
StevenTCramer
| Ruakij
Ruakij
| pdecat
pdecat
| kyle-apex
kyle-apex
| emshvac
emshvac
| -| Lunchb0ne
Lunchb0ne
| SmartManoj
SmartManoj
| vagadiya
vagadiya
| slytechnical
slytechnical
| dlab-anton
dlab-anton
| arthurauffray
arthurauffray
| -| upamune
upamune
| NamesMT
NamesMT
| taylorwilsdon
taylorwilsdon
| sammcj
sammcj
| p12tic
p12tic
| gtaylor
gtaylor
| -| brunobergher
brunobergher
| aitoroses
aitoroses
| ross
ross
| mr-ryan-james
mr-ryan-james
| heyseth
heyseth
| taisukeoe
taisukeoe
| -| avtc
avtc
| eonghk
eonghk
| GOODBOY008
GOODBOY008
| kcwhite
kcwhite
| ronyblum
ronyblum
| teddyOOXX
teddyOOXX
| -| thill2323
thill2323
| vincentsong
vincentsong
| yongjer
yongjer
| zeozeozeo
zeozeozeo
| ashktn
ashktn
| franekp
franekp
| -| yt3trees
yt3trees
| seedlord
seedlord
| axkirillov
axkirillov
| anton-otee
anton-otee
| benzntech
benzntech
| bramburn
bramburn
| -| olearycrew
olearycrew
| catrielmuller
catrielmuller
| devxpain
devxpain
| snoyiatk
snoyiatk
| GitlyHallows
GitlyHallows
| philfung
philfung
| -| napter
napter
| mdp
mdp
| SplittyDev
SplittyDev
| jcbdev
jcbdev
| Chenjiayuan195
Chenjiayuan195
| julionav
julionav
| -| KanTakahiro
KanTakahiro
| kevint-cerebras
kevint-cerebras
| cdlliuy
cdlliuy
| im47cn
im47cn
| hongzio
hongzio
| hatsu38
hatsu38
| -| forestyoo
forestyoo
| janaki-sasidhar
janaki-sasidhar
| dqroid
dqroid
| dairui1
dairui1
| chris-garrett
chris-garrett
| bbenshalom
bbenshalom
| -| bannzai
bannzai
| axmo
axmo
| asychin
asychin
| amittell
amittell
| nevermorec
nevermorec
| Yoshino-Yukitaro
Yoshino-Yukitaro
| -| Yikai-Liao
Yikai-Liao
| zxdvd
zxdvd
| s97712
s97712
| vladstudio
vladstudio
| vivekfyi
vivekfyi
| HahaBill
HahaBill
| -| tmsjngx0
tmsjngx0
| TGlide
TGlide
| Githubguy132010
Githubguy132010
| tgfjt
tgfjt
| maekawataiki
maekawataiki
| AlexandruSmirnov
AlexandruSmirnov
| -| user202729
user202729
| takakoutso
takakoutso
| student20880
student20880
| shubhamgupta731
shubhamgupta731
| shohei-ihaya
shohei-ihaya
| shivamd1810
shivamd1810
| -| shaybc
shaybc
| sensei-woo
sensei-woo
| samir-nimbly
samir-nimbly
| zetaloop
zetaloop
| robertheadley
robertheadley
| refactorthis
refactorthis
| -| qingyuan1109
qingyuan1109
| pokutuna
pokutuna
| philipnext
philipnext
| village-way
village-way
| oprstchn
oprstchn
| nobu007
nobu007
| -| mosleyit
mosleyit
| moqimoqidea
moqimoqidea
| mlopezr
mlopezr
| mecab
mecab
| olup
olup
| lightrabbit
lightrabbit
| -| lhish
lhish
| kohii
kohii
| PretzelVector
PretzelVector
| kinandan
kinandan
| jwcraig
jwcraig
| shoopapa
shoopapa
| -| samsilveira
samsilveira
| pfitz
pfitz
| ExactDoug
ExactDoug
| celestial-vault
celestial-vault
| linegel
linegel
| edwin-truthsearch-io
edwin-truthsearch-io
| -| EamonNerbonne
EamonNerbonne
| dbasclpy
dbasclpy
| dflatline
dflatline
| Deon588
Deon588
| dleen
dleen
| CW-B-W
CW-B-W
| -| chadgauth
chadgauth
| thecolorblue
thecolorblue
| bogdan0083
bogdan0083
| benashby
benashby
| Atlogit
Atlogit
| atlasgong
atlasgong
| -| AntiMoron
AntiMoron
| andrewshu2000
andrewshu2000
| andreastempsch
andreastempsch
| alasano
alasano
| QuinsZouls
QuinsZouls
| HadesArchitect
HadesArchitect
| -| alarno
alarno
| nexon33
nexon33
| adilhafeez
adilhafeez
| adamwlarson
adamwlarson
| adamhill
adamhill
| AMHesch
AMHesch
| -| adambrand
adambrand
| 01Rian
01Rian
| RSO
RSO
| RandalSchwartz
RandalSchwartz
| SECKainersdorfer
SECKainersdorfer
| R-omk
R-omk
| -| pwilkin
pwilkin
| Sarke
Sarke
| PaperBoardOfficial
PaperBoardOfficial
| OlegOAndreev
OlegOAndreev
| Naam
Naam
| NaccOll
NaccOll
| -| kvokka
kvokka
| ecmasx
ecmasx
| mollux
mollux
| marvijo-code
marvijo-code
| markijbema
markijbema
| mamertofabian
mamertofabian
| -| monkeyDluffy6017
monkeyDluffy6017
| libertyteeth
libertyteeth
| shtse8
shtse8
| Rexarrior
Rexarrior
| kevinvandijk
kevinvandijk
| KevinZhao
KevinZhao
| -| ksze
ksze
| Juice10
Juice10
| snova-jorgep
snova-jorgep
| Fovty
Fovty
| Jdo300
Jdo300
| hesara
hesara
| -| DeXtroTip
DeXtroTip
| | | | | | - - - ## License [Apache 2.0 © 2025 Roo Code, Inc.](./LICENSE) diff --git a/apps/vscode-e2e/package.json b/apps/vscode-e2e/package.json index 1d19ffebf2a..d366f72a2d3 100644 --- a/apps/vscode-e2e/package.json +++ b/apps/vscode-e2e/package.json @@ -18,7 +18,7 @@ "@types/vscode": "^1.95.0", "@vscode/test-cli": "^0.0.11", "@vscode/test-electron": "^2.4.0", - "glob": "^11.0.1", + "glob": "^11.1.0", "mocha": "^11.1.0", "rimraf": "^6.0.1", "typescript": "5.8.3" diff --git a/apps/vscode-e2e/src/runTest.ts b/apps/vscode-e2e/src/runTest.ts index 2e8b262a490..2bec946b4ab 100644 --- a/apps/vscode-e2e/src/runTest.ts +++ b/apps/vscode-e2e/src/runTest.ts @@ -38,6 +38,7 @@ async function main() { extensionTestsPath, launchArgs: [testWorkspace], extensionTestsEnv, + version: process.env.VSCODE_VERSION || "1.101.2", }) // Clean up the temporary workspace diff --git a/apps/vscode-e2e/src/suite/extension.test.ts b/apps/vscode-e2e/src/suite/extension.test.ts index e7a92521cf6..c5340a882d6 100644 --- a/apps/vscode-e2e/src/suite/extension.test.ts +++ b/apps/vscode-e2e/src/suite/extension.test.ts @@ -15,16 +15,10 @@ suite("Roo Code Extension", function () { "SidebarProvider.removeView", "activationCompleted", "plusButtonClicked", - "mcpButtonClicked", - "promptsButtonClicked", "popoutButtonClicked", "openInNewTab", "settingsButtonClicked", "historyButtonClicked", - "showHumanRelayDialog", - "registerHumanRelayCallback", - "unregisterHumanRelayCallback", - "handleHumanRelayResponse", "newTask", "setCustomStoragePath", "focusInput", diff --git a/apps/vscode-e2e/src/suite/tools/apply-diff.test.ts b/apps/vscode-e2e/src/suite/tools/apply-diff.test.ts index 729d6839b19..c4f279f5f6d 100644 --- a/apps/vscode-e2e/src/suite/tools/apply-diff.test.ts +++ b/apps/vscode-e2e/src/suite/tools/apply-diff.test.ts @@ -8,7 +8,7 @@ import { RooCodeEventName, type ClineMessage } from "@roo-code/types" import { waitFor, sleep } from "../utils" import { setDefaultSuiteTimeout } from "../test-utils" -suite("Roo Code apply_diff Tool", function () { +suite.skip("Roo Code apply_diff Tool", function () { setDefaultSuiteTimeout(this) let workspaceDir: string diff --git a/apps/vscode-e2e/src/suite/tools/execute-command.test.ts b/apps/vscode-e2e/src/suite/tools/execute-command.test.ts index f207dae685c..3dbfb709348 100644 --- a/apps/vscode-e2e/src/suite/tools/execute-command.test.ts +++ b/apps/vscode-e2e/src/suite/tools/execute-command.test.ts @@ -8,7 +8,7 @@ import { RooCodeEventName, type ClineMessage } from "@roo-code/types" import { waitFor, sleep, waitUntilCompleted } from "../utils" import { setDefaultSuiteTimeout } from "../test-utils" -suite("Roo Code execute_command Tool", function () { +suite.skip("Roo Code execute_command Tool", function () { setDefaultSuiteTimeout(this) let workspaceDir: string diff --git a/apps/vscode-e2e/src/suite/tools/insert-content.test.ts b/apps/vscode-e2e/src/suite/tools/insert-content.test.ts deleted file mode 100644 index 4dd0c209280..00000000000 --- a/apps/vscode-e2e/src/suite/tools/insert-content.test.ts +++ /dev/null @@ -1,628 +0,0 @@ -import * as assert from "assert" -import * as fs from "fs/promises" -import * as path from "path" -import * as vscode from "vscode" - -import { RooCodeEventName, type ClineMessage } from "@roo-code/types" - -import { waitFor, sleep } from "../utils" -import { setDefaultSuiteTimeout } from "../test-utils" - -suite("Roo Code insert_content Tool", function () { - setDefaultSuiteTimeout(this) - - let workspaceDir: string - - // Pre-created test files that will be used across tests - const testFiles = { - simpleText: { - name: `test-insert-simple-${Date.now()}.txt`, - content: "Line 1\nLine 2\nLine 3", - path: "", - }, - jsFile: { - name: `test-insert-js-${Date.now()}.js`, - content: `function hello() { - console.log("Hello World") -} - -function goodbye() { - console.log("Goodbye World") -}`, - path: "", - }, - emptyFile: { - name: `test-insert-empty-${Date.now()}.txt`, - content: "", - path: "", - }, - pythonFile: { - name: `test-insert-python-${Date.now()}.py`, - content: `def main(): - print("Start") - print("End")`, - path: "", - }, - } - - // Get the actual workspace directory that VSCode is using and create all test files - suiteSetup(async function () { - // Get the workspace folder from VSCode - const workspaceFolders = vscode.workspace.workspaceFolders - if (!workspaceFolders || workspaceFolders.length === 0) { - throw new Error("No workspace folder found") - } - workspaceDir = workspaceFolders[0]!.uri.fsPath - console.log("Using workspace directory:", workspaceDir) - - // Create all test files before any tests run - console.log("Creating test files in workspace...") - for (const [key, file] of Object.entries(testFiles)) { - file.path = path.join(workspaceDir, file.name) - await fs.writeFile(file.path, file.content) - console.log(`Created ${key} test file at:`, file.path) - } - - // Verify all files exist - for (const [key, file] of Object.entries(testFiles)) { - const exists = await fs - .access(file.path) - .then(() => true) - .catch(() => false) - if (!exists) { - throw new Error(`Failed to create ${key} test file at ${file.path}`) - } - } - }) - - // Clean up after all tests - suiteTeardown(async () => { - // Cancel any running tasks before cleanup - test("Should insert content at the beginning of a file (line 1)", async function () { - const api = globalThis.api - // Clean up before each test - setup(async () => { - // Cancel any previous task - try { - await globalThis.api.cancelCurrentTask() - } catch { - // Task might not be running - } - - // Small delay to ensure clean state - await sleep(100) - }) - - // Clean up after each test - teardown(async () => { - // Cancel the current task - try { - await globalThis.api.cancelCurrentTask() - } catch { - // Task might not be running - } - - // Small delay to ensure clean state - await sleep(100) - }) - const messages: ClineMessage[] = [] - const testFile = testFiles.simpleText - const insertContent = "New first line" - const expectedContent = `${insertContent} -${testFile.content}` - let taskStarted = false - let taskCompleted = false - let errorOccurred: string | null = null - let insertContentExecuted = false - - // Listen for messages - const messageHandler = ({ message }: { message: ClineMessage }) => { - messages.push(message) - - // Log important messages for debugging - if (message.type === "say" && message.say === "error") { - errorOccurred = message.text || "Unknown error" - console.error("Error:", message.text) - } - if (message.type === "ask" && message.ask === "tool") { - console.log("Tool request:", message.text?.substring(0, 200)) - } - if (message.type === "say" && (message.say === "completion_result" || message.say === "text")) { - console.log("AI response:", message.text?.substring(0, 200)) - } - - // Check for tool execution - if (message.type === "say" && message.say === "api_req_started" && message.text) { - console.log("API request started:", message.text.substring(0, 200)) - try { - const requestData = JSON.parse(message.text) - if (requestData.request && requestData.request.includes("insert_content")) { - insertContentExecuted = true - console.log("insert_content tool executed!") - } - } catch (e) { - console.log("Failed to parse api_req_started message:", e) - } - } - } - api.on(RooCodeEventName.Message, messageHandler) - - // Listen for task events - const taskStartedHandler = (id: string) => { - if (id === taskId) { - taskStarted = true - console.log("Task started:", id) - } - } - api.on(RooCodeEventName.TaskStarted, taskStartedHandler) - - const taskCompletedHandler = (id: string) => { - if (id === taskId) { - taskCompleted = true - console.log("Task completed:", id) - } - } - api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler) - - let taskId: string - try { - // Start the task - taskId = await api.startNewTask({ - configuration: { - mode: "code", - autoApprovalEnabled: true, - alwaysAllowWrite: true, - alwaysAllowReadOnly: true, - alwaysAllowReadOnlyOutsideWorkspace: true, - }, - text: `Use insert_content to add "${insertContent}" at line 1 (beginning) of the file ${testFile.name}. The file already exists with this content: -${testFile.content} - -Assume the file exists and you can modify it directly.`, - }) - - console.log("Task ID:", taskId) - console.log("Test filename:", testFile.name) - - // Wait for task to start - await waitFor(() => taskStarted, { timeout: 45_000 }) - - // Check for early errors - if (errorOccurred) { - console.error("Early error detected:", errorOccurred) - } - - // Wait for task completion - await waitFor(() => taskCompleted, { timeout: 45_000 }) - - // Give extra time for file system operations - await sleep(2000) - - // Check if the file was modified correctly - const actualContent = await fs.readFile(testFile.path, "utf-8") - console.log("File content after insertion:", actualContent) - - // Verify tool was executed - assert.strictEqual(insertContentExecuted, true, "insert_content tool should have been executed") - - // Verify file content - assert.strictEqual( - actualContent.trim(), - expectedContent.trim(), - "Content should be inserted at the beginning of the file", - ) - - // Verify no errors occurred - assert.strictEqual( - errorOccurred, - null, - `Task should complete without errors, but got: ${errorOccurred}`, - ) - - console.log("Test passed! insert_content tool executed and content inserted at beginning successfully") - } finally { - api.off(RooCodeEventName.Message, messageHandler) - api.off(RooCodeEventName.TaskStarted, taskStartedHandler) - api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler) - } - }) - try { - await globalThis.api.cancelCurrentTask() - } catch { - // Task might not be running - } - - // Clean up all test files - console.log("Cleaning up test files...") - for (const [key, file] of Object.entries(testFiles)) { - try { - await fs.unlink(file.path) - console.log(`Cleaned up ${key} test file`) - } catch (error) { - console.log(`Failed to clean up ${key} test file:`, error) - } - } - }) - - test("Should insert content at the end of a file (line 0)", async function () { - const api = globalThis.api - const messages: ClineMessage[] = [] - const testFile = testFiles.simpleText - const insertContent = "New last line" - const expectedContent = `${testFile.content} -${insertContent}` - let taskStarted = false - let taskCompleted = false - let errorOccurred: string | null = null - let insertContentExecuted = false - - // Listen for messages - const messageHandler = ({ message }: { message: ClineMessage }) => { - messages.push(message) - - // Log important messages for debugging - if (message.type === "say" && message.say === "error") { - errorOccurred = message.text || "Unknown error" - console.error("Error:", message.text) - } - if (message.type === "ask" && message.ask === "tool") { - console.log("Tool request:", message.text?.substring(0, 200)) - } - if (message.type === "say" && (message.say === "completion_result" || message.say === "text")) { - console.log("AI response:", message.text?.substring(0, 200)) - } - - // Check for tool execution - if (message.type === "say" && message.say === "api_req_started" && message.text) { - console.log("API request started:", message.text.substring(0, 200)) - try { - const requestData = JSON.parse(message.text) - if (requestData.request && requestData.request.includes("insert_content")) { - insertContentExecuted = true - console.log("insert_content tool executed!") - } - } catch (e) { - console.log("Failed to parse api_req_started message:", e) - } - } - } - api.on(RooCodeEventName.Message, messageHandler) - - // Listen for task events - const taskStartedHandler = (id: string) => { - if (id === taskId) { - taskStarted = true - console.log("Task started:", id) - } - } - api.on(RooCodeEventName.TaskStarted, taskStartedHandler) - - const taskCompletedHandler = (id: string) => { - if (id === taskId) { - taskCompleted = true - console.log("Task completed:", id) - } - } - api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler) - - let taskId: string - try { - // Start the task - taskId = await api.startNewTask({ - configuration: { - mode: "code", - autoApprovalEnabled: true, - alwaysAllowWrite: true, - alwaysAllowReadOnly: true, - alwaysAllowReadOnlyOutsideWorkspace: true, - }, - text: `Use insert_content to add "${insertContent}" at line 0 (end of file) of the file ${testFile.name}. The file already exists with this content: -${testFile.content} - -Assume the file exists and you can modify it directly.`, - }) - - console.log("Task ID:", taskId) - console.log("Test filename:", testFile.name) - - // Wait for task to start - await waitFor(() => taskStarted, { timeout: 45_000 }) - - // Check for early errors - if (errorOccurred) { - console.error("Early error detected:", errorOccurred) - } - - // Wait for task completion - await waitFor(() => taskCompleted, { timeout: 45_000 }) - - // Give extra time for file system operations - await sleep(2000) - - // Check if the file was modified correctly - const actualContent = await fs.readFile(testFile.path, "utf-8") - console.log("File content after insertion:", actualContent) - - // Verify tool was executed - test("Should insert multiline content into a JavaScript file", async function () { - const api = globalThis.api - const messages: ClineMessage[] = [] - const testFile = testFiles.jsFile - const insertContent = `// New import statements -import { utils } from './utils' -import { helpers } from './helpers'` - const expectedContent = `${insertContent} -${testFile.content}` - let taskStarted = false - let taskCompleted = false - let errorOccurred: string | null = null - let insertContentExecuted = false - - // Listen for messages - const messageHandler = ({ message }: { message: ClineMessage }) => { - messages.push(message) - - // Log important messages for debugging - if (message.type === "say" && message.say === "error") { - errorOccurred = message.text || "Unknown error" - console.error("Error:", message.text) - } - if (message.type === "ask" && message.ask === "tool") { - console.log("Tool request:", message.text?.substring(0, 200)) - } - if (message.type === "say" && (message.say === "completion_result" || message.say === "text")) { - console.log("AI response:", message.text?.substring(0, 200)) - } - - // Check for tool execution - if (message.type === "say" && message.say === "api_req_started" && message.text) { - console.log("API request started:", message.text.substring(0, 200)) - try { - const requestData = JSON.parse(message.text) - if (requestData.request && requestData.request.includes("insert_content")) { - insertContentExecuted = true - console.log("insert_content tool executed!") - } - } catch (e) { - console.log("Failed to parse api_req_started message:", e) - } - } - } - api.on(RooCodeEventName.Message, messageHandler) - - // Listen for task events - const taskStartedHandler = (id: string) => { - if (id === taskId) { - taskStarted = true - console.log("Task started:", id) - } - } - api.on(RooCodeEventName.TaskStarted, taskStartedHandler) - - const taskCompletedHandler = (id: string) => { - if (id === taskId) { - taskCompleted = true - console.log("Task completed:", id) - } - } - api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler) - - let taskId: string - try { - // Start the task - taskId = await api.startNewTask({ - configuration: { - mode: "code", - autoApprovalEnabled: true, - alwaysAllowWrite: true, - alwaysAllowReadOnly: true, - alwaysAllowReadOnlyOutsideWorkspace: true, - }, - text: `Use insert_content to add import statements at the beginning (line 1) of the JavaScript file ${testFile.name}. Add these lines: -${insertContent} - -The file already exists with this content: -${testFile.content} - -Assume the file exists and you can modify it directly.`, - }) - - console.log("Task ID:", taskId) - console.log("Test filename:", testFile.name) - - // Wait for task to start - await waitFor(() => taskStarted, { timeout: 45_000 }) - - // Check for early errors - if (errorOccurred) { - console.error("Early error detected:", errorOccurred) - } - - // Wait for task completion - await waitFor(() => taskCompleted, { timeout: 45_000 }) - - // Give extra time for file system operations - await sleep(2000) - - test("Should insert content into an empty file", async function () { - const api = globalThis.api - const messages: ClineMessage[] = [] - const testFile = testFiles.emptyFile - const insertContent = `# My New File -This is the first line of content -And this is the second line` - const expectedContent = insertContent - let taskStarted = false - let taskCompleted = false - let errorOccurred: string | null = null - let insertContentExecuted = false - - // Listen for messages - const messageHandler = ({ message }: { message: ClineMessage }) => { - messages.push(message) - - // Log important messages for debugging - if (message.type === "say" && message.say === "error") { - errorOccurred = message.text || "Unknown error" - console.error("Error:", message.text) - } - if (message.type === "ask" && message.ask === "tool") { - console.log("Tool request:", message.text?.substring(0, 200)) - } - if ( - message.type === "say" && - (message.say === "completion_result" || message.say === "text") - ) { - console.log("AI response:", message.text?.substring(0, 200)) - } - - // Check for tool execution - if (message.type === "say" && message.say === "api_req_started" && message.text) { - console.log("API request started:", message.text.substring(0, 200)) - try { - const requestData = JSON.parse(message.text) - if (requestData.request && requestData.request.includes("insert_content")) { - insertContentExecuted = true - console.log("insert_content tool executed!") - } - } catch (e) { - console.log("Failed to parse api_req_started message:", e) - } - } - } - api.on(RooCodeEventName.Message, messageHandler) - - // Listen for task events - const taskStartedHandler = (id: string) => { - if (id === taskId) { - taskStarted = true - console.log("Task started:", id) - } - } - api.on(RooCodeEventName.TaskStarted, taskStartedHandler) - - const taskCompletedHandler = (id: string) => { - if (id === taskId) { - taskCompleted = true - console.log("Task completed:", id) - } - } - api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler) - - let taskId: string - try { - // Start the task - taskId = await api.startNewTask({ - configuration: { - mode: "code", - autoApprovalEnabled: true, - alwaysAllowWrite: true, - alwaysAllowReadOnly: true, - alwaysAllowReadOnlyOutsideWorkspace: true, - }, - text: `Use insert_content to add content to the empty file ${testFile.name}. Add this content at line 0 (end of file): -${insertContent} - -The file is currently empty. Assume the file exists and you can modify it directly.`, - }) - - console.log("Task ID:", taskId) - console.log("Test filename:", testFile.name) - - // Wait for task to start - await waitFor(() => taskStarted, { timeout: 45_000 }) - - // Check for early errors - if (errorOccurred) { - console.error("Early error detected:", errorOccurred) - } - - // Wait for task completion - await waitFor(() => taskCompleted, { timeout: 45_000 }) - - // Give extra time for file system operations - await sleep(2000) - - // Check if the file was modified correctly - const actualContent = await fs.readFile(testFile.path, "utf-8") - console.log("File content after insertion:", actualContent) - - // Verify tool was executed - assert.strictEqual( - insertContentExecuted, - true, - "insert_content tool should have been executed", - ) - - // Verify file content - assert.strictEqual( - actualContent.trim(), - expectedContent.trim(), - "Content should be inserted into the empty file", - ) - - // Verify no errors occurred - assert.strictEqual( - errorOccurred, - null, - `Task should complete without errors, but got: ${errorOccurred}`, - ) - - console.log( - "Test passed! insert_content tool executed and content inserted into empty file successfully", - ) - } finally { - api.off(RooCodeEventName.Message, messageHandler) - api.off(RooCodeEventName.TaskStarted, taskStartedHandler) - api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler) - } - }) - // Check if the file was modified correctly - const actualContent = await fs.readFile(testFile.path, "utf-8") - console.log("File content after insertion:", actualContent) - - // Verify tool was executed - assert.strictEqual(insertContentExecuted, true, "insert_content tool should have been executed") - - // Verify file content - assert.strictEqual( - actualContent.trim(), - expectedContent.trim(), - "Multiline content should be inserted at the beginning of the JavaScript file", - ) - - // Verify no errors occurred - assert.strictEqual( - errorOccurred, - null, - `Task should complete without errors, but got: ${errorOccurred}`, - ) - - console.log("Test passed! insert_content tool executed and multiline content inserted successfully") - } finally { - api.off(RooCodeEventName.Message, messageHandler) - api.off(RooCodeEventName.TaskStarted, taskStartedHandler) - api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler) - } - }) - assert.strictEqual(insertContentExecuted, true, "insert_content tool should have been executed") - - // Verify file content - assert.strictEqual( - actualContent.trim(), - expectedContent.trim(), - "Content should be inserted at the end of the file", - ) - - // Verify no errors occurred - assert.strictEqual(errorOccurred, null, `Task should complete without errors, but got: ${errorOccurred}`) - - console.log("Test passed! insert_content tool executed and content inserted at end successfully") - } finally { - api.off(RooCodeEventName.Message, messageHandler) - api.off(RooCodeEventName.TaskStarted, taskStartedHandler) - api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler) - } - }) - // Tests will be added here one by one -}) diff --git a/apps/vscode-e2e/src/suite/tools/list-files.test.ts b/apps/vscode-e2e/src/suite/tools/list-files.test.ts index 5a1fd6cc3be..386433e7b8a 100644 --- a/apps/vscode-e2e/src/suite/tools/list-files.test.ts +++ b/apps/vscode-e2e/src/suite/tools/list-files.test.ts @@ -8,7 +8,7 @@ import { RooCodeEventName, type ClineMessage } from "@roo-code/types" import { waitFor, sleep } from "../utils" import { setDefaultSuiteTimeout } from "../test-utils" -suite("Roo Code list_files Tool", function () { +suite.skip("Roo Code list_files Tool", function () { setDefaultSuiteTimeout(this) let workspaceDir: string diff --git a/apps/vscode-e2e/src/suite/tools/read-file.test.ts b/apps/vscode-e2e/src/suite/tools/read-file.test.ts index 99e3f184577..00aca7f58ab 100644 --- a/apps/vscode-e2e/src/suite/tools/read-file.test.ts +++ b/apps/vscode-e2e/src/suite/tools/read-file.test.ts @@ -9,7 +9,7 @@ import { RooCodeEventName, type ClineMessage } from "@roo-code/types" import { waitFor, sleep } from "../utils" import { setDefaultSuiteTimeout } from "../test-utils" -suite("Roo Code read_file Tool", function () { +suite.skip("Roo Code read_file Tool", function () { setDefaultSuiteTimeout(this) let tempDir: string diff --git a/apps/vscode-e2e/src/suite/tools/search-and-replace.test.ts b/apps/vscode-e2e/src/suite/tools/search-and-replace.test.ts deleted file mode 100644 index 801a829a74b..00000000000 --- a/apps/vscode-e2e/src/suite/tools/search-and-replace.test.ts +++ /dev/null @@ -1,631 +0,0 @@ -import * as assert from "assert" -import * as fs from "fs/promises" -import * as path from "path" -import * as vscode from "vscode" - -import { RooCodeEventName, type ClineMessage } from "@roo-code/types" - -import { waitFor, sleep } from "../utils" -import { setDefaultSuiteTimeout } from "../test-utils" - -suite("Roo Code search_and_replace Tool", function () { - setDefaultSuiteTimeout(this) - - let workspaceDir: string - - // Pre-created test files that will be used across tests - const testFiles = { - simpleReplace: { - name: `test-simple-replace-${Date.now()}.txt`, - content: "Hello World\nThis is a test file\nWith multiple lines\nHello again", - path: "", - }, - regexReplace: { - name: `test-regex-replace-${Date.now()}.js`, - content: `function oldFunction() { - console.log("old implementation") - return "old result" -} - -function anotherOldFunction() { - console.log("another old implementation") - return "another old result" -}`, - path: "", - }, - caseInsensitive: { - name: `test-case-insensitive-${Date.now()}.txt`, - content: `Hello World -HELLO UNIVERSE -hello everyone -HeLLo ThErE`, - path: "", - }, - multipleMatches: { - name: `test-multiple-matches-${Date.now()}.txt`, - content: `TODO: Fix this bug -This is some content -TODO: Add more tests -Some more content -TODO: Update documentation -Final content`, - path: "", - }, - noMatches: { - name: `test-no-matches-${Date.now()}.txt`, - content: "This file has no matching patterns\nJust regular content\nNothing special here", - path: "", - }, - } - - // Get the actual workspace directory that VSCode is using and create all test files - suiteSetup(async function () { - // Get the workspace folder from VSCode - const workspaceFolders = vscode.workspace.workspaceFolders - if (!workspaceFolders || workspaceFolders.length === 0) { - throw new Error("No workspace folder found") - } - workspaceDir = workspaceFolders[0]!.uri.fsPath - console.log("Using workspace directory:", workspaceDir) - - // Create all test files before any tests run - console.log("Creating test files in workspace...") - for (const [key, file] of Object.entries(testFiles)) { - file.path = path.join(workspaceDir, file.name) - await fs.writeFile(file.path, file.content) - console.log(`Created ${key} test file at:`, file.path) - } - - // Verify all files exist - for (const [key, file] of Object.entries(testFiles)) { - const exists = await fs - .access(file.path) - .then(() => true) - .catch(() => false) - if (!exists) { - throw new Error(`Failed to create ${key} test file at ${file.path}`) - } - } - }) - - // Clean up after all tests - suiteTeardown(async () => { - // Cancel any running tasks before cleanup - try { - await globalThis.api.cancelCurrentTask() - } catch { - // Task might not be running - } - - // Clean up all test files - console.log("Cleaning up test files...") - for (const [key, file] of Object.entries(testFiles)) { - try { - await fs.unlink(file.path) - console.log(`Cleaned up ${key} test file`) - } catch (error) { - console.log(`Failed to clean up ${key} test file:`, error) - } - } - }) - - // Clean up before each test - setup(async () => { - // Cancel any previous task - try { - await globalThis.api.cancelCurrentTask() - } catch { - // Task might not be running - } - - // Small delay to ensure clean state - await sleep(100) - }) - - // Clean up after each test - teardown(async () => { - // Cancel the current task - try { - await globalThis.api.cancelCurrentTask() - } catch { - // Task might not be running - } - - // Small delay to ensure clean state - await sleep(100) - }) - - test("Should perform simple text replacement", async function () { - const api = globalThis.api - const messages: ClineMessage[] = [] - const testFile = testFiles.simpleReplace - const expectedContent = "Hello Universe\nThis is a test file\nWith multiple lines\nHello again" - let taskStarted = false - let taskCompleted = false - let errorOccurred: string | null = null - let searchReplaceExecuted = false - - // Listen for messages - const messageHandler = ({ message }: { message: ClineMessage }) => { - messages.push(message) - - // Log important messages for debugging - if (message.type === "say" && message.say === "error") { - errorOccurred = message.text || "Unknown error" - console.error("Error:", message.text) - } - if (message.type === "ask" && message.ask === "tool") { - console.log("Tool request:", message.text?.substring(0, 200)) - } - if (message.type === "say" && (message.say === "completion_result" || message.say === "text")) { - console.log("AI response:", message.text?.substring(0, 200)) - } - - // Check for tool execution - if (message.type === "say" && message.say === "api_req_started" && message.text) { - console.log("API request started:", message.text.substring(0, 200)) - try { - const requestData = JSON.parse(message.text) - if (requestData.request && requestData.request.includes("search_and_replace")) { - searchReplaceExecuted = true - console.log("search_and_replace tool executed!") - } - } catch (e) { - console.log("Failed to parse api_req_started message:", e) - } - } - } - api.on(RooCodeEventName.Message, messageHandler) - - // Listen for task events - const taskStartedHandler = (id: string) => { - if (id === taskId) { - taskStarted = true - console.log("Task started:", id) - } - } - api.on(RooCodeEventName.TaskStarted, taskStartedHandler) - - const taskCompletedHandler = (id: string) => { - if (id === taskId) { - taskCompleted = true - console.log("Task completed:", id) - } - } - api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler) - - let taskId: string - try { - // Start task with search_and_replace instruction - taskId = await api.startNewTask({ - configuration: { - mode: "code", - autoApprovalEnabled: true, - alwaysAllowWrite: true, - alwaysAllowReadOnly: true, - alwaysAllowReadOnlyOutsideWorkspace: true, - }, - text: `Use search_and_replace on the file ${testFile.name} to replace "Hello World" with "Hello Universe". - -The file is located at: ${testFile.path} - -The file already exists with this content: -${testFile.content} - -Assume the file exists and you can modify it directly.`, - }) - - console.log("Task ID:", taskId) - console.log("Test filename:", testFile.name) - - // Wait for task to start - await waitFor(() => taskStarted, { timeout: 45_000 }) - - // Check for early errors - if (errorOccurred) { - console.error("Early error detected:", errorOccurred) - } - - // Wait for task completion - await waitFor(() => taskCompleted, { timeout: 45_000 }) - - // Give extra time for file system operations - await sleep(2000) - - // Check if the file was modified correctly - const actualContent = await fs.readFile(testFile.path, "utf-8") - console.log("File content after modification:", actualContent) - - // Verify tool was executed - assert.strictEqual(searchReplaceExecuted, true, "search_and_replace tool should have been executed") - - // Verify file content - assert.strictEqual( - actualContent.trim(), - expectedContent.trim(), - "File content should be modified correctly", - ) - - console.log("Test passed! search_and_replace tool executed and file modified successfully") - } finally { - // Clean up - api.off(RooCodeEventName.Message, messageHandler) - api.off(RooCodeEventName.TaskStarted, taskStartedHandler) - api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler) - } - }) - - test("Should perform regex pattern replacement", async function () { - const api = globalThis.api - const messages: ClineMessage[] = [] - const testFile = testFiles.regexReplace - const expectedContent = `function newFunction() { - console.log("new implementation") - return "new result" -} - -function anotherNewFunction() { - console.log("another new implementation") - return "another new result" -}` - let taskStarted = false - let taskCompleted = false - let errorOccurred: string | null = null - let searchReplaceExecuted = false - - // Listen for messages - const messageHandler = ({ message }: { message: ClineMessage }) => { - messages.push(message) - - // Log important messages for debugging - if (message.type === "say" && message.say === "error") { - errorOccurred = message.text || "Unknown error" - console.error("Error:", message.text) - } - if (message.type === "ask" && message.ask === "tool") { - console.log("Tool request:", message.text?.substring(0, 200)) - } - if (message.type === "say" && (message.say === "completion_result" || message.say === "text")) { - console.log("AI response:", message.text?.substring(0, 200)) - } - - // Check for tool execution - if (message.type === "say" && message.say === "api_req_started" && message.text) { - console.log("API request started:", message.text.substring(0, 200)) - try { - const requestData = JSON.parse(message.text) - if (requestData.request && requestData.request.includes("search_and_replace")) { - searchReplaceExecuted = true - console.log("search_and_replace tool executed!") - } - } catch (e) { - console.log("Failed to parse api_req_started message:", e) - } - } - } - api.on(RooCodeEventName.Message, messageHandler) - - // Listen for task events - const taskStartedHandler = (id: string) => { - if (id === taskId) { - taskStarted = true - console.log("Task started:", id) - } - } - api.on(RooCodeEventName.TaskStarted, taskStartedHandler) - - const taskCompletedHandler = (id: string) => { - if (id === taskId) { - taskCompleted = true - console.log("Task completed:", id) - } - } - api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler) - - let taskId: string - try { - // Start task with search_and_replace instruction - simpler and more direct - taskId = await api.startNewTask({ - configuration: { - mode: "code", - autoApprovalEnabled: true, - alwaysAllowWrite: true, - alwaysAllowReadOnly: true, - alwaysAllowReadOnlyOutsideWorkspace: true, - }, - text: `Use search_and_replace on the file ${testFile.name} to: -1. First, replace "old" with "new" (use_regex: false) -2. Then, replace "Old" with "New" (use_regex: false) - -The file is located at: ${testFile.path} - -Assume the file exists and you can modify it directly. - -Use the search_and_replace tool twice - once for each replacement.`, - }) - - console.log("Task ID:", taskId) - console.log("Test filename:", testFile.name) - - // Wait for task to start - await waitFor(() => taskStarted, { timeout: 90_000 }) - - // Check for early errors - if (errorOccurred) { - console.error("Early error detected:", errorOccurred) - } - - // Wait for task completion - await waitFor(() => taskCompleted, { timeout: 90_000 }) - - // Give extra time for file system operations - await sleep(2000) - - // Check if the file was modified correctly - const actualContent = await fs.readFile(testFile.path, "utf-8") - console.log("File content after modification:", actualContent) - - // Verify tool was executed - assert.strictEqual(searchReplaceExecuted, true, "search_and_replace tool should have been executed") - - // Verify file content - assert.strictEqual( - actualContent.trim(), - expectedContent.trim(), - "File content should be modified with regex replacement", - ) - - console.log("Test passed! search_and_replace tool executed with regex successfully") - } finally { - // Clean up - api.off(RooCodeEventName.Message, messageHandler) - api.off(RooCodeEventName.TaskStarted, taskStartedHandler) - api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler) - } - }) - - test("Should replace multiple matches in file", async function () { - const api = globalThis.api - const messages: ClineMessage[] = [] - const testFile = testFiles.multipleMatches - const expectedContent = `DONE: Fix this bug -This is some content -DONE: Add more tests -Some more content -DONE: Update documentation -Final content` - let taskStarted = false - let taskCompleted = false - let errorOccurred: string | null = null - let searchReplaceExecuted = false - - // Listen for messages - const messageHandler = ({ message }: { message: ClineMessage }) => { - messages.push(message) - - // Log important messages for debugging - if (message.type === "say" && message.say === "error") { - errorOccurred = message.text || "Unknown error" - console.error("Error:", message.text) - } - if (message.type === "ask" && message.ask === "tool") { - console.log("Tool request:", message.text?.substring(0, 200)) - } - if (message.type === "say" && (message.say === "completion_result" || message.say === "text")) { - console.log("AI response:", message.text?.substring(0, 200)) - } - - // Check for tool execution - if (message.type === "say" && message.say === "api_req_started" && message.text) { - console.log("API request started:", message.text.substring(0, 200)) - try { - const requestData = JSON.parse(message.text) - if (requestData.request && requestData.request.includes("search_and_replace")) { - searchReplaceExecuted = true - console.log("search_and_replace tool executed!") - } - } catch (e) { - console.log("Failed to parse api_req_started message:", e) - } - } - } - api.on(RooCodeEventName.Message, messageHandler) - - // Listen for task events - const taskStartedHandler = (id: string) => { - if (id === taskId) { - taskStarted = true - console.log("Task started:", id) - } - } - api.on(RooCodeEventName.TaskStarted, taskStartedHandler) - - const taskCompletedHandler = (id: string) => { - if (id === taskId) { - taskCompleted = true - console.log("Task completed:", id) - } - } - api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler) - - let taskId: string - try { - // Start task with search_and_replace instruction for multiple matches - taskId = await api.startNewTask({ - configuration: { - mode: "code", - autoApprovalEnabled: true, - alwaysAllowWrite: true, - alwaysAllowReadOnly: true, - alwaysAllowReadOnlyOutsideWorkspace: true, - }, - text: `Use search_and_replace on the file ${testFile.name} to replace all occurrences of "TODO" with "DONE". - -The file is located at: ${testFile.path} - -The file already exists with this content: -${testFile.content} - -Assume the file exists and you can modify it directly.`, - }) - - console.log("Task ID:", taskId) - console.log("Test filename:", testFile.name) - - // Wait for task to start - await waitFor(() => taskStarted, { timeout: 45_000 }) - - // Check for early errors - if (errorOccurred) { - console.error("Early error detected:", errorOccurred) - } - - // Wait for task completion - await waitFor(() => taskCompleted, { timeout: 45_000 }) - - // Give extra time for file system operations - await sleep(2000) - - // Check if the file was modified correctly - const actualContent = await fs.readFile(testFile.path, "utf-8") - console.log("File content after modification:", actualContent) - - // Verify tool was executed - assert.strictEqual(searchReplaceExecuted, true, "search_and_replace tool should have been executed") - - // Verify file content - assert.strictEqual( - actualContent.trim(), - expectedContent.trim(), - "All TODO occurrences should be replaced with DONE", - ) - - console.log("Test passed! search_and_replace tool executed and replaced multiple matches successfully") - } finally { - // Clean up - api.off(RooCodeEventName.Message, messageHandler) - api.off(RooCodeEventName.TaskStarted, taskStartedHandler) - api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler) - } - }) - - test("Should handle case when no matches are found", async function () { - const api = globalThis.api - const messages: ClineMessage[] = [] - const testFile = testFiles.noMatches - const expectedContent = testFile.content // Should remain unchanged - let taskStarted = false - let taskCompleted = false - let errorOccurred: string | null = null - let searchReplaceExecuted = false - - // Listen for messages - const messageHandler = ({ message }: { message: ClineMessage }) => { - messages.push(message) - - // Log important messages for debugging - if (message.type === "say" && message.say === "error") { - errorOccurred = message.text || "Unknown error" - console.error("Error:", message.text) - } - if (message.type === "ask" && message.ask === "tool") { - console.log("Tool request:", message.text?.substring(0, 200)) - } - if (message.type === "say" && (message.say === "completion_result" || message.say === "text")) { - console.log("AI response:", message.text?.substring(0, 200)) - } - - // Check for tool execution - if (message.type === "say" && message.say === "api_req_started" && message.text) { - console.log("API request started:", message.text.substring(0, 200)) - try { - const requestData = JSON.parse(message.text) - if (requestData.request && requestData.request.includes("search_and_replace")) { - searchReplaceExecuted = true - console.log("search_and_replace tool executed!") - } - } catch (e) { - console.log("Failed to parse api_req_started message:", e) - } - } - } - api.on(RooCodeEventName.Message, messageHandler) - - // Listen for task events - const taskStartedHandler = (id: string) => { - if (id === taskId) { - taskStarted = true - console.log("Task started:", id) - } - } - api.on(RooCodeEventName.TaskStarted, taskStartedHandler) - - const taskCompletedHandler = (id: string) => { - if (id === taskId) { - taskCompleted = true - console.log("Task completed:", id) - } - } - api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler) - - let taskId: string - try { - // Start task with search_and_replace instruction for pattern that won't match - taskId = await api.startNewTask({ - configuration: { - mode: "code", - autoApprovalEnabled: true, - alwaysAllowWrite: true, - alwaysAllowReadOnly: true, - alwaysAllowReadOnlyOutsideWorkspace: true, - }, - text: `Use search_and_replace on the file ${testFile.name} to replace "NONEXISTENT_PATTERN" with "REPLACEMENT". This pattern should not be found in the file. - -The file is located at: ${testFile.path} - -The file already exists with this content: -${testFile.content} - -Assume the file exists and you can modify it directly.`, - }) - - console.log("Task ID:", taskId) - console.log("Test filename:", testFile.name) - - // Wait for task to start - await waitFor(() => taskStarted, { timeout: 45_000 }) - - // Check for early errors - if (errorOccurred) { - console.error("Early error detected:", errorOccurred) - } - - // Wait for task completion - await waitFor(() => taskCompleted, { timeout: 45_000 }) - - // Give extra time for file system operations - await sleep(2000) - - // Check if the file remains unchanged - const actualContent = await fs.readFile(testFile.path, "utf-8") - console.log("File content after search (should be unchanged):", actualContent) - - // Verify tool was executed - assert.strictEqual(searchReplaceExecuted, true, "search_and_replace tool should have been executed") - - // Verify file content remains unchanged - assert.strictEqual( - actualContent.trim(), - expectedContent.trim(), - "File content should remain unchanged when no matches are found", - ) - - console.log("Test passed! search_and_replace tool executed and handled no matches correctly") - } finally { - // Clean up - api.off(RooCodeEventName.Message, messageHandler) - api.off(RooCodeEventName.TaskStarted, taskStartedHandler) - api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler) - } - }) -}) diff --git a/apps/vscode-e2e/src/suite/tools/search-files.test.ts b/apps/vscode-e2e/src/suite/tools/search-files.test.ts index 98cfd1b3eed..2b54df3f048 100644 --- a/apps/vscode-e2e/src/suite/tools/search-files.test.ts +++ b/apps/vscode-e2e/src/suite/tools/search-files.test.ts @@ -8,7 +8,7 @@ import { RooCodeEventName, type ClineMessage } from "@roo-code/types" import { waitFor, sleep } from "../utils" import { setDefaultSuiteTimeout } from "../test-utils" -suite("Roo Code search_files Tool", function () { +suite.skip("Roo Code search_files Tool", function () { setDefaultSuiteTimeout(this) let workspaceDir: string diff --git a/apps/vscode-e2e/src/suite/tools/write-to-file.test.ts b/apps/vscode-e2e/src/suite/tools/write-to-file.test.ts index dea51386cf9..fee15add17b 100644 --- a/apps/vscode-e2e/src/suite/tools/write-to-file.test.ts +++ b/apps/vscode-e2e/src/suite/tools/write-to-file.test.ts @@ -8,7 +8,7 @@ import { RooCodeEventName, type ClineMessage } from "@roo-code/types" import { waitFor, sleep } from "../utils" import { setDefaultSuiteTimeout } from "../test-utils" -suite("Roo Code write_to_file Tool", function () { +suite.skip("Roo Code write_to_file Tool", function () { setDefaultSuiteTimeout(this) let tempDir: string diff --git a/apps/web-docs/.gitkeep b/apps/web-docs/.gitkeep deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/apps/web-docs/README.md b/apps/web-docs/README.md deleted file mode 100644 index 1333ed77b7e..00000000000 --- a/apps/web-docs/README.md +++ /dev/null @@ -1 +0,0 @@ -TODO diff --git a/apps/web-evals/.env b/apps/web-evals/.env index 7970806becc..1bb6dd6dacb 100644 --- a/apps/web-evals/.env +++ b/apps/web-evals/.env @@ -1 +1 @@ -DATABASE_URL=postgres://postgres:password@localhost:5432/evals_development +DATABASE_URL=postgres://postgres:password@localhost:5433/evals_development diff --git a/apps/web-evals/package.json b/apps/web-evals/package.json index df8efec1158..9ba2c98c2c9 100644 --- a/apps/web-evals/package.json +++ b/apps/web-evals/package.json @@ -5,15 +5,16 @@ "scripts": { "lint": "next lint --max-warnings 0", "check-types": "tsc -b", - "dev": "scripts/check-services.sh && next dev", + "dev": "scripts/check-services.sh && next dev -p 3446", "format": "prettier --write src", "build": "next build", - "start": "next start", + "start": "next start -p 3446", "clean": "rimraf tsconfig.tsbuildinfo .next .turbo" }, "dependencies": { "@hookform/resolvers": "^5.1.1", "@radix-ui/react-alert-dialog": "^1.1.7", + "@radix-ui/react-checkbox": "^1.1.5", "@radix-ui/react-dialog": "^1.1.6", "@radix-ui/react-dropdown-menu": "^2.1.7", "@radix-ui/react-label": "^2.1.2", @@ -24,16 +25,17 @@ "@radix-ui/react-slider": "^1.2.4", "@radix-ui/react-slot": "^1.1.2", "@radix-ui/react-tabs": "^1.1.3", - "@radix-ui/react-tooltip": "^1.1.8", + "@radix-ui/react-tooltip": "^1.2.8", "@roo-code/evals": "workspace:^", "@roo-code/types": "workspace:^", "@tanstack/react-query": "^5.69.0", + "archiver": "^7.0.1", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "cmdk": "^1.1.0", "fuzzysort": "^3.1.0", "lucide-react": "^0.518.0", - "next": "^15.2.5", + "next": "~15.2.8", "next-themes": "^0.4.6", "p-map": "^7.0.3", "react": "^18.3.1", @@ -51,6 +53,7 @@ "@roo-code/config-eslint": "workspace:^", "@roo-code/config-typescript": "workspace:^", "@tailwindcss/postcss": "^4", + "@types/archiver": "^7.0.0", "@types/ps-tree": "^1.1.6", "@types/react": "^18.3.23", "@types/react-dom": "^18.3.5", diff --git a/apps/web-evals/scripts/check-services.sh b/apps/web-evals/scripts/check-services.sh index 104a4722088..d72ffd54e8d 100755 --- a/apps/web-evals/scripts/check-services.sh +++ b/apps/web-evals/scripts/check-services.sh @@ -5,13 +5,13 @@ if ! docker info &> /dev/null; then exit 1 fi -if ! nc -z localhost 5432 2>/dev/null; then +if ! nc -z postgres 5433 2>/dev/null; then echo "❌ PostgreSQL is not running on port 5432" echo "💡 Start it with: pnpm --filter @roo-code/evals db:up" exit 1 fi -if ! nc -z localhost 6379 2>/dev/null; then +if ! nc -z redis 6380 2>/dev/null; then echo "❌ Redis is not running on port 6379" echo "💡 Start it with: pnpm --filter @roo-code/evals redis:up" exit 1 diff --git a/apps/web-evals/src/actions/__tests__/killRun.spec.ts b/apps/web-evals/src/actions/__tests__/killRun.spec.ts new file mode 100644 index 00000000000..814d70d9fca --- /dev/null +++ b/apps/web-evals/src/actions/__tests__/killRun.spec.ts @@ -0,0 +1,207 @@ +// npx vitest run src/actions/__tests__/killRun.spec.ts + +import { execFileSync } from "child_process" + +// Mock child_process +vi.mock("child_process", () => ({ + execFileSync: vi.fn(), + spawn: vi.fn(), +})) + +// Mock next/cache +vi.mock("next/cache", () => ({ + revalidatePath: vi.fn(), +})) + +// Mock redis client +vi.mock("@/lib/server/redis", () => ({ + redisClient: vi.fn().mockResolvedValue({ + del: vi.fn().mockResolvedValue(1), + }), +})) + +// Mock @roo-code/evals +vi.mock("@roo-code/evals", () => ({ + createRun: vi.fn(), + deleteRun: vi.fn(), + createTask: vi.fn(), + exerciseLanguages: [], + getExercisesForLanguage: vi.fn().mockResolvedValue([]), +})) + +// Mock timers to speed up tests +vi.useFakeTimers() + +// Import after mocks +import { killRun } from "../runs" + +const mockExecFileSync = execFileSync as ReturnType + +describe("killRun", () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + afterEach(() => { + vi.clearAllTimers() + }) + + it("should kill controller first, wait, then kill task containers", async () => { + const runId = 123 + + // execFileSync is used for all docker commands + mockExecFileSync + .mockReturnValueOnce("") // docker kill controller + .mockReturnValueOnce("evals-task-123-456.0\nevals-task-123-789.1\n") // docker ps + .mockReturnValueOnce("") // docker kill evals-task-123-456.0 + .mockReturnValueOnce("") // docker kill evals-task-123-789.1 + + const resultPromise = killRun(runId) + + // Fast-forward past the 10 second sleep + await vi.advanceTimersByTimeAsync(10000) + + const result = await resultPromise + + expect(result.success).toBe(true) + expect(result.killedContainers).toContain("evals-controller-123") + expect(result.killedContainers).toContain("evals-task-123-456.0") + expect(result.killedContainers).toContain("evals-task-123-789.1") + expect(result.errors).toHaveLength(0) + + // Verify execFileSync was called for docker kill + expect(mockExecFileSync).toHaveBeenNthCalledWith( + 1, + "docker", + ["kill", "evals-controller-123"], + expect.any(Object), + ) + // Verify execFileSync was called for docker ps with run-specific filter + expect(mockExecFileSync).toHaveBeenNthCalledWith( + 2, + "docker", + ["ps", "--format", "{{.Names}}", "--filter", "name=evals-task-123-"], + expect.any(Object), + ) + }) + + it("should continue killing runners even if controller is not running", async () => { + const runId = 456 + + mockExecFileSync + .mockImplementationOnce(() => { + throw new Error("No such container") + }) // controller kill fails + .mockReturnValueOnce("evals-task-456-100.0\n") // docker ps + .mockReturnValueOnce("") // docker kill task + + const resultPromise = killRun(runId) + await vi.advanceTimersByTimeAsync(10000) + const result = await resultPromise + + expect(result.success).toBe(true) + expect(result.killedContainers).toContain("evals-task-456-100.0") + // Controller not in list since it failed + expect(result.killedContainers).not.toContain("evals-controller-456") + }) + + it("should clear Redis state after killing containers", async () => { + const runId = 789 + + const mockDel = vi.fn().mockResolvedValue(1) + const { redisClient } = await import("@/lib/server/redis") + vi.mocked(redisClient).mockResolvedValue({ del: mockDel } as never) + + mockExecFileSync + .mockReturnValueOnce("") // controller kill + .mockReturnValueOnce("") // docker ps (no tasks) + + const resultPromise = killRun(runId) + await vi.advanceTimersByTimeAsync(10000) + await resultPromise + + expect(mockDel).toHaveBeenCalledWith("heartbeat:789") + expect(mockDel).toHaveBeenCalledWith("runners:789") + }) + + it("should handle docker ps failure gracefully", async () => { + const runId = 111 + + mockExecFileSync + .mockReturnValueOnce("") // controller kill succeeds + .mockImplementationOnce(() => { + throw new Error("Docker error") + }) // docker ps fails + + const resultPromise = killRun(runId) + await vi.advanceTimersByTimeAsync(10000) + const result = await resultPromise + + // Should still be successful because controller was killed + expect(result.success).toBe(true) + expect(result.killedContainers).toContain("evals-controller-111") + expect(result.errors).toContain("Failed to list Docker task containers") + }) + + it("should handle individual task kill failures", async () => { + const runId = 222 + + mockExecFileSync + .mockReturnValueOnce("") // controller kill + .mockReturnValueOnce("evals-task-222-300.0\nevals-task-222-400.0\n") // docker ps + .mockImplementationOnce(() => { + throw new Error("Kill failed") + }) // first task kill fails + .mockReturnValueOnce("") // second task kill succeeds + + const resultPromise = killRun(runId) + await vi.advanceTimersByTimeAsync(10000) + const result = await resultPromise + + expect(result.success).toBe(true) + expect(result.killedContainers).toContain("evals-controller-222") + expect(result.killedContainers).toContain("evals-task-222-400.0") + expect(result.errors.length).toBe(1) + expect(result.errors[0]).toContain("evals-task-222-300.0") + }) + + it("should return success with no containers when nothing is running", async () => { + const runId = 333 + + mockExecFileSync + .mockImplementationOnce(() => { + throw new Error("No such container") + }) // controller not running + .mockReturnValueOnce("") // no task containers + + const resultPromise = killRun(runId) + await vi.advanceTimersByTimeAsync(10000) + const result = await resultPromise + + expect(result.success).toBe(true) + expect(result.killedContainers).toHaveLength(0) + expect(result.errors).toHaveLength(0) + }) + + it("should only kill containers belonging to the specific run", async () => { + const runId = 555 + + mockExecFileSync + .mockReturnValueOnce("") // controller kill + .mockReturnValueOnce("evals-task-555-100.0\n") // docker ps + .mockReturnValueOnce("") // docker kill task + + const resultPromise = killRun(runId) + await vi.advanceTimersByTimeAsync(10000) + const result = await resultPromise + + expect(result.success).toBe(true) + // Verify execFileSync was called for docker ps with run-specific filter + expect(mockExecFileSync).toHaveBeenNthCalledWith( + 2, + "docker", + ["ps", "--format", "{{.Names}}", "--filter", "name=evals-task-555-"], + expect.any(Object), + ) + }) +}) diff --git a/apps/web-evals/src/actions/runs.ts b/apps/web-evals/src/actions/runs.ts index 2eae1f6804a..9d213547cee 100644 --- a/apps/web-evals/src/actions/runs.ts +++ b/apps/web-evals/src/actions/runs.ts @@ -3,7 +3,7 @@ import * as path from "path" import fs from "fs" import { fileURLToPath } from "url" -import { spawn } from "child_process" +import { spawn, execFileSync } from "child_process" import { revalidatePath } from "next/cache" import pMap from "p-map" @@ -13,16 +13,22 @@ import { exerciseLanguages, createRun as _createRun, deleteRun as _deleteRun, + updateRun as _updateRun, + getIncompleteRuns as _getIncompleteRuns, + deleteRunsByIds as _deleteRunsByIds, createTask, getExercisesForLanguage, } from "@roo-code/evals" import { CreateRun } from "@/lib/schemas" +import { redisClient } from "@/lib/server/redis" + +// Storage base path for eval logs +const EVALS_STORAGE_PATH = "/tmp/evals/runs" const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals") -// eslint-disable-next-line @typescript-eslint/no-unused-vars -export async function createRun({ suite, exercises = [], systemPrompt, timeout, ...values }: CreateRun) { +export async function createRun({ suite, exercises = [], timeout, iterations = 1, ...values }: CreateRun) { const run = await _createRun({ ...values, timeout, @@ -37,15 +43,34 @@ export async function createRun({ suite, exercises = [], systemPrompt, timeout, throw new Error("Invalid exercise path: " + path) } - await createTask({ ...values, runId: run.id, language: language as ExerciseLanguage, exercise }) + // Create multiple tasks for each iteration + for (let iteration = 1; iteration <= iterations; iteration++) { + await createTask({ + ...values, + runId: run.id, + language: language as ExerciseLanguage, + exercise, + iteration, + }) + } } } else { for (const language of exerciseLanguages) { - const exercises = await getExercisesForLanguage(EVALS_REPO_PATH, language) + const languageExercises = await getExercisesForLanguage(EVALS_REPO_PATH, language) - await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), { - concurrency: 10, - }) + // Create tasks for all iterations of each exercise + const tasksToCreate: Array<{ language: ExerciseLanguage; exercise: string; iteration: number }> = [] + for (const exercise of languageExercises) { + for (let iteration = 1; iteration <= iterations; iteration++) { + tasksToCreate.push({ language, exercise, iteration }) + } + } + + await pMap( + tasksToCreate, + ({ language, exercise, iteration }) => createTask({ runId: run.id, language, exercise, iteration }), + { concurrency: 10 }, + ) } } @@ -98,3 +123,247 @@ export async function deleteRun(runId: number) { await _deleteRun(runId) revalidatePath("/runs") } + +export type KillRunResult = { + success: boolean + killedContainers: string[] + errors: string[] +} + +const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)) + +/** + * Kill all Docker containers associated with a run (controller and task runners). + * Kills the controller first, waits 10 seconds, then kills runners. + * Also clears Redis state for heartbeat and runners. + * + * Container naming conventions: + * - Controller: evals-controller-{runId} + * - Task runners: evals-task-{runId}-{taskId}.{attempt} + */ +export async function killRun(runId: number): Promise { + const killedContainers: string[] = [] + const errors: string[] = [] + const controllerPattern = `evals-controller-${runId}` + const taskPattern = `evals-task-${runId}-` + + try { + // Step 1: Kill the controller first + console.log(`Killing controller: ${controllerPattern}`) + try { + execFileSync("docker", ["kill", controllerPattern], { encoding: "utf-8", timeout: 10000 }) + killedContainers.push(controllerPattern) + console.log(`Killed controller container: ${controllerPattern}`) + } catch (_error) { + // Controller might not be running - that's ok, continue to kill runners + console.log(`Controller ${controllerPattern} not running or already stopped`) + } + + // Step 2: Wait 10 seconds before killing runners + console.log("Waiting 10 seconds before killing runners...") + await sleep(10000) + + // Step 3: Find and kill all task runner containers for THIS run only + let taskContainerNames: string[] = [] + + try { + const output = execFileSync("docker", ["ps", "--format", "{{.Names}}", "--filter", `name=${taskPattern}`], { + encoding: "utf-8", + timeout: 10000, + }) + taskContainerNames = output + .split("\n") + .map((name) => name.trim()) + .filter((name) => name.length > 0 && name.startsWith(taskPattern)) + } catch (error) { + console.error("Failed to list task containers:", error) + errors.push("Failed to list Docker task containers") + } + + // Kill each task runner container + for (const containerName of taskContainerNames) { + try { + execFileSync("docker", ["kill", containerName], { encoding: "utf-8", timeout: 10000 }) + killedContainers.push(containerName) + console.log(`Killed task container: ${containerName}`) + } catch (error) { + // Container might have already stopped + console.error(`Failed to kill container ${containerName}:`, error) + errors.push(`Failed to kill container: ${containerName}`) + } + } + + // Step 4: Clear Redis state + try { + const redis = await redisClient() + const heartbeatKey = `heartbeat:${runId}` + const runnersKey = `runners:${runId}` + + await redis.del(heartbeatKey) + await redis.del(runnersKey) + console.log(`Cleared Redis keys: ${heartbeatKey}, ${runnersKey}`) + } catch (error) { + console.error("Failed to clear Redis state:", error) + errors.push("Failed to clear Redis state") + } + } catch (error) { + console.error("Error in killRun:", error) + errors.push("Unexpected error while killing containers") + } + + revalidatePath(`/runs/${runId}`) + revalidatePath("/runs") + + return { + success: killedContainers.length > 0 || errors.length === 0, + killedContainers, + errors, + } +} + +export type DeleteIncompleteRunsResult = { + success: boolean + deletedCount: number + deletedRunIds: number[] + storageErrors: string[] +} + +/** + * Delete all incomplete runs (runs without a taskMetricsId/final score). + * Removes both database records and storage folders. + */ +export async function deleteIncompleteRuns(): Promise { + const storageErrors: string[] = [] + + // Get all incomplete runs + const incompleteRuns = await _getIncompleteRuns() + const runIds = incompleteRuns.map((run) => run.id) + + if (runIds.length === 0) { + return { + success: true, + deletedCount: 0, + deletedRunIds: [], + storageErrors: [], + } + } + + // Delete storage folders for each run + for (const runId of runIds) { + const storagePath = path.join(EVALS_STORAGE_PATH, String(runId)) + try { + if (fs.existsSync(storagePath)) { + fs.rmSync(storagePath, { recursive: true, force: true }) + console.log(`Deleted storage folder: ${storagePath}`) + } + } catch (error) { + console.error(`Failed to delete storage folder ${storagePath}:`, error) + storageErrors.push(`Failed to delete storage for run ${runId}`) + } + + // Also try to clear Redis state for any potentially running incomplete runs + try { + const redis = await redisClient() + await redis.del(`heartbeat:${runId}`) + await redis.del(`runners:${runId}`) + } catch (error) { + // Non-critical error, just log it + console.error(`Failed to clear Redis state for run ${runId}:`, error) + } + } + + // Delete from database + await _deleteRunsByIds(runIds) + + revalidatePath("/runs") + + return { + success: true, + deletedCount: runIds.length, + deletedRunIds: runIds, + storageErrors, + } +} + +/** + * Get count of incomplete runs (for UI display) + */ +export async function getIncompleteRunsCount(): Promise { + const incompleteRuns = await _getIncompleteRuns() + return incompleteRuns.length +} + +/** + * Delete all runs older than 30 days. + * Removes both database records and storage folders. + */ +export async function deleteOldRuns(): Promise { + const storageErrors: string[] = [] + + // Get all runs older than 30 days + const thirtyDaysAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000) + const { getRuns } = await import("@roo-code/evals") + const allRuns = await getRuns() + const oldRuns = allRuns.filter((run) => run.createdAt < thirtyDaysAgo) + const runIds = oldRuns.map((run) => run.id) + + if (runIds.length === 0) { + return { + success: true, + deletedCount: 0, + deletedRunIds: [], + storageErrors: [], + } + } + + // Delete storage folders for each run + for (const runId of runIds) { + const storagePath = path.join(EVALS_STORAGE_PATH, String(runId)) + try { + if (fs.existsSync(storagePath)) { + fs.rmSync(storagePath, { recursive: true, force: true }) + console.log(`Deleted storage folder: ${storagePath}`) + } + } catch (error) { + console.error(`Failed to delete storage folder ${storagePath}:`, error) + storageErrors.push(`Failed to delete storage for run ${runId}`) + } + + // Also try to clear Redis state + try { + const redis = await redisClient() + await redis.del(`heartbeat:${runId}`) + await redis.del(`runners:${runId}`) + } catch (error) { + // Non-critical error, just log it + console.error(`Failed to clear Redis state for run ${runId}:`, error) + } + } + + // Delete from database + await _deleteRunsByIds(runIds) + + revalidatePath("/runs") + + return { + success: true, + deletedCount: runIds.length, + deletedRunIds: runIds, + storageErrors, + } +} + +/** + * Update the description of a run. + */ +export async function updateRunDescription(runId: number, description: string | null): Promise<{ success: boolean }> { + try { + await _updateRun(runId, { description }) + revalidatePath("/runs") + revalidatePath(`/runs/${runId}`) + return { success: true } + } catch (error) { + console.error("Failed to update run description:", error) + return { success: false } + } +} diff --git a/apps/web-evals/src/app/api/health/route.ts b/apps/web-evals/src/app/api/health/route.ts deleted file mode 100644 index ca8a833942f..00000000000 --- a/apps/web-evals/src/app/api/health/route.ts +++ /dev/null @@ -1,24 +0,0 @@ -import { NextResponse } from "next/server" - -export async function GET() { - try { - return NextResponse.json( - { - status: "healthy", - timestamp: new Date().toISOString(), - uptime: process.uptime(), - environment: process.env.NODE_ENV || "production", - }, - { status: 200 }, - ) - } catch (error) { - return NextResponse.json( - { - status: "unhealthy", - timestamp: new Date().toISOString(), - error: error instanceof Error ? error.message : "Unknown error", - }, - { status: 503 }, - ) - } -} diff --git a/apps/web-evals/src/app/api/runs/[id]/logs/[taskId]/route.ts b/apps/web-evals/src/app/api/runs/[id]/logs/[taskId]/route.ts new file mode 100644 index 00000000000..e5ec8751ab0 --- /dev/null +++ b/apps/web-evals/src/app/api/runs/[id]/logs/[taskId]/route.ts @@ -0,0 +1,74 @@ +import { NextResponse } from "next/server" +import type { NextRequest } from "next/server" +import * as fs from "node:fs/promises" +import * as path from "node:path" + +import { findTask, findRun } from "@roo-code/evals" + +export const dynamic = "force-dynamic" + +const LOG_BASE_PATH = "/tmp/evals/runs" + +// Sanitize path components to prevent path traversal attacks +function sanitizePathComponent(component: string): string { + // Remove any path separators, null bytes, and other dangerous characters + return component.replace(/[/\\:\0*?"<>|]/g, "_") +} + +export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string; taskId: string }> }) { + const { id, taskId } = await params + + try { + const runId = Number(id) + const taskIdNum = Number(taskId) + + if (isNaN(runId) || isNaN(taskIdNum)) { + return NextResponse.json({ error: "Invalid run ID or task ID" }, { status: 400 }) + } + + // Verify the run exists + await findRun(runId) + + // Get the task to find its language and exercise + const task = await findTask(taskIdNum) + + // Verify the task belongs to this run + if (task.runId !== runId) { + return NextResponse.json({ error: "Task does not belong to this run" }, { status: 404 }) + } + + // Sanitize language and exercise to prevent path traversal + const safeLanguage = sanitizePathComponent(task.language) + const safeExercise = sanitizePathComponent(task.exercise) + + // Construct the log file path + const logFileName = `${safeLanguage}-${safeExercise}.log` + const logFilePath = path.join(LOG_BASE_PATH, String(runId), logFileName) + + // Verify the resolved path is within the expected directory (defense in depth) + const resolvedPath = path.resolve(logFilePath) + const expectedBase = path.resolve(LOG_BASE_PATH) + if (!resolvedPath.startsWith(expectedBase)) { + return NextResponse.json({ error: "Invalid log path" }, { status: 400 }) + } + + // Check if the log file exists and read it (async) + try { + const logContent = await fs.readFile(logFilePath, "utf-8") + return NextResponse.json({ logContent }) + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + return NextResponse.json({ error: "Log file not found", logContent: null }, { status: 200 }) + } + throw err + } + } catch (error) { + console.error("Error reading task log:", error) + + if (error instanceof Error && error.name === "RecordNotFoundError") { + return NextResponse.json({ error: "Task or run not found" }, { status: 404 }) + } + + return NextResponse.json({ error: "Failed to read log file" }, { status: 500 }) + } +} diff --git a/apps/web-evals/src/app/api/runs/[id]/logs/failed/route.ts b/apps/web-evals/src/app/api/runs/[id]/logs/failed/route.ts new file mode 100644 index 00000000000..8b2760df987 --- /dev/null +++ b/apps/web-evals/src/app/api/runs/[id]/logs/failed/route.ts @@ -0,0 +1,147 @@ +import { NextResponse } from "next/server" +import type { NextRequest } from "next/server" +import * as fs from "node:fs" +import * as path from "node:path" +import archiver from "archiver" + +import { findRun, getTasks } from "@roo-code/evals" + +export const dynamic = "force-dynamic" + +const LOG_BASE_PATH = "/tmp/evals/runs" + +// Sanitize path components to prevent path traversal attacks +function sanitizePathComponent(component: string): string { + // Remove any path separators, null bytes, and other dangerous characters + return component.replace(/[/\\:\0*?"<>|]/g, "_") +} + +export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string }> }) { + const { id } = await params + + try { + const runId = Number(id) + + if (isNaN(runId)) { + return NextResponse.json({ error: "Invalid run ID" }, { status: 400 }) + } + + // Verify the run exists + await findRun(runId) + + // Get all tasks for this run + const tasks = await getTasks(runId) + + // Filter for failed tasks only + const failedTasks = tasks.filter((task) => task.passed === false) + + if (failedTasks.length === 0) { + return NextResponse.json({ error: "No failed tasks to export" }, { status: 400 }) + } + + // Create a zip archive + const archive = archiver("zip", { zlib: { level: 9 } }) + + // Collect chunks to build the response + const chunks: Buffer[] = [] + + archive.on("data", (chunk: Buffer) => { + chunks.push(chunk) + }) + + // Track archive errors + let archiveError: Error | null = null + archive.on("error", (err: Error) => { + archiveError = err + }) + + // Set up the end promise before finalizing (proper event listener ordering) + const archiveEndPromise = new Promise((resolve, reject) => { + archive.on("end", resolve) + archive.on("error", reject) + }) + + // Add each failed task's log file and history files to the archive + const logDir = path.join(LOG_BASE_PATH, String(runId)) + let filesAdded = 0 + + for (const task of failedTasks) { + // Sanitize language and exercise to prevent path traversal + const safeLanguage = sanitizePathComponent(task.language) + const safeExercise = sanitizePathComponent(task.exercise) + const expectedBase = path.resolve(LOG_BASE_PATH) + + // Add the log file + const logFileName = `${safeLanguage}-${safeExercise}.log` + const logFilePath = path.join(logDir, logFileName) + + // Verify the resolved path is within the expected directory (defense in depth) + const resolvedLogPath = path.resolve(logFilePath) + if (resolvedLogPath.startsWith(expectedBase) && fs.existsSync(logFilePath)) { + archive.file(logFilePath, { name: logFileName }) + filesAdded++ + } + + // Add the API conversation history file + // Format: {language}-{exercise}.{iteration}_api_conversation_history.json + const apiHistoryFileName = `${safeLanguage}-${safeExercise}.${task.iteration}_api_conversation_history.json` + const apiHistoryFilePath = path.join(logDir, apiHistoryFileName) + const resolvedApiHistoryPath = path.resolve(apiHistoryFilePath) + if (resolvedApiHistoryPath.startsWith(expectedBase) && fs.existsSync(apiHistoryFilePath)) { + archive.file(apiHistoryFilePath, { name: apiHistoryFileName }) + filesAdded++ + } + + // Add the UI messages file + // Format: {language}-{exercise}.{iteration}_ui_messages.json + const uiMessagesFileName = `${safeLanguage}-${safeExercise}.${task.iteration}_ui_messages.json` + const uiMessagesFilePath = path.join(logDir, uiMessagesFileName) + const resolvedUiMessagesPath = path.resolve(uiMessagesFilePath) + if (resolvedUiMessagesPath.startsWith(expectedBase) && fs.existsSync(uiMessagesFilePath)) { + archive.file(uiMessagesFilePath, { name: uiMessagesFileName }) + filesAdded++ + } + } + + // Check if any files were actually added + if (filesAdded === 0) { + archive.abort() + return NextResponse.json( + { error: "No log files found - they may have been cleared from disk" }, + { status: 404 }, + ) + } + + // Finalize the archive + await archive.finalize() + + // Wait for all data to be collected + await archiveEndPromise + + // Check for archive errors + if (archiveError) { + throw archiveError + } + + // Combine all chunks into a single buffer + const zipBuffer = Buffer.concat(chunks) + + // Return the zip file + return new NextResponse(zipBuffer, { + status: 200, + headers: { + "Content-Type": "application/zip", + "Content-Disposition": `attachment; filename="run-${runId}-failed-logs.zip"`, + "Content-Length": String(zipBuffer.length), + }, + }) + } catch (error) { + console.error("Error exporting failed logs:", error) + + if (error instanceof Error && error.name === "RecordNotFoundError") { + return NextResponse.json({ error: "Run not found" }, { status: 404 }) + } + + return NextResponse.json({ error: "Failed to export logs" }, { status: 500 }) + } +} diff --git a/apps/web-evals/src/app/runs/[id]/page.tsx b/apps/web-evals/src/app/runs/[id]/page.tsx index aae3fc70f9b..8b993eec8a0 100644 --- a/apps/web-evals/src/app/runs/[id]/page.tsx +++ b/apps/web-evals/src/app/runs/[id]/page.tsx @@ -7,7 +7,7 @@ export default async function Page({ params }: { params: Promise<{ id: string }> const run = await findRun(Number(id)) return ( -
+
) diff --git a/apps/web-evals/src/app/runs/[id]/run-status.tsx b/apps/web-evals/src/app/runs/[id]/run-status.tsx index 4b94ef14fab..e05b1b51ebe 100644 --- a/apps/web-evals/src/app/runs/[id]/run-status.tsx +++ b/apps/web-evals/src/app/runs/[id]/run-status.tsx @@ -1,55 +1,79 @@ "use client" +import { Link2, Link2Off, CheckCircle2 } from "lucide-react" import type { RunStatus as _RunStatus } from "@/hooks/use-run-status" import { cn } from "@/lib/utils" +import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui" -export const RunStatus = ({ runStatus: { sseStatus, heartbeat, runners = [] } }: { runStatus: _RunStatus }) => ( -
-
-
-
Task Stream:
-
{sseStatus}
-
-
-
-
-
-
-
-
-
Task Controller:
-
{heartbeat ?? "dead"}
-
-
-
-
-
-
-
-
Task Runners:
- {runners.length > 0 &&
{runners?.join(", ")}
} -
-
-) +function StreamIcon({ status }: { status: "connected" | "waiting" | "error" }) { + if (status === "connected") { + return + } + return +} + +export const RunStatus = ({ + runStatus: { sseStatus, heartbeat, runners = [] }, + isComplete = false, +}: { + runStatus: _RunStatus + isComplete?: boolean +}) => { + // For completed runs, show a simple "Complete" badge + if (isComplete) { + return ( + + +
+ +
+
+ + Run complete + +
+ ) + } + + return ( + + +
+ {/* Task Stream status icon */} + + + {/* Task Controller ID */} + {heartbeat ?? "-"} + + {/* Task Runners count */} + 0 ? "text-green-500" : "text-rose-500"}> + {runners.length > 0 ? `${runners.length}r` : "0r"} + +
+
+ +
+
+ + Task Stream: {sseStatus} +
+
+ + Task Controller: {heartbeat ?? "dead"} +
+
+ 0 ? "text-green-500" : "text-rose-500"}>● + Task Runners: {runners.length > 0 ? runners.length : "none"} +
+ {runners.length > 0 && ( +
+ {runners.map((runner) => ( +
{runner}
+ ))} +
+ )} +
+
+
+ ) +} diff --git a/apps/web-evals/src/app/runs/[id]/run.tsx b/apps/web-evals/src/app/runs/[id]/run.tsx index b6c5290b135..badd77741e0 100644 --- a/apps/web-evals/src/app/runs/[id]/run.tsx +++ b/apps/web-evals/src/app/runs/[id]/run.tsx @@ -1,112 +1,1058 @@ "use client" -import { useMemo } from "react" -import { LoaderCircle } from "lucide-react" +import { useMemo, useState, useCallback, useEffect, Fragment } from "react" +import { toast } from "sonner" +import { LoaderCircle, FileText, Copy, Check, StopCircle, List, Layers } from "lucide-react" -import type { Run, TaskMetrics as _TaskMetrics } from "@roo-code/evals" +import type { Run, TaskMetrics as _TaskMetrics, Task } from "@roo-code/evals" +import type { ToolName } from "@roo-code/types" -import { formatCurrency, formatDuration, formatTokens } from "@/lib/formatters" +import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters" import { useRunStatus } from "@/hooks/use-run-status" -import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui" +import { killRun } from "@/actions/runs" +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, + Tooltip, + TooltipContent, + TooltipTrigger, + Dialog, + DialogContent, + DialogHeader, + DialogTitle, + ScrollArea, + Button, + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, +} from "@/components/ui" import { TaskStatus } from "./task-status" import { RunStatus } from "./run-status" type TaskMetrics = Pick<_TaskMetrics, "tokensIn" | "tokensOut" | "tokensContext" | "duration" | "cost"> +// Extended Task type with taskMetrics from useRunStatus +type TaskWithMetrics = Task & { taskMetrics: _TaskMetrics | null } + +type ToolUsageEntry = { attempts: number; failures: number } +type ToolUsage = Record + +// Generate abbreviation from tool name (e.g., "read_file" -> "RF", "list_code_definition_names" -> "LCDN") +function getToolAbbreviation(toolName: string): string { + return toolName + .split("_") + .map((word) => word[0]?.toUpperCase() ?? "") + .join("") +} + +// Pattern definitions for syntax highlighting +type HighlightPattern = { + pattern: RegExp + className: string + // If true, wraps the entire match; if a number, wraps that capture group + wrapGroup?: number +} + +const HIGHLIGHT_PATTERNS: HighlightPattern[] = [ + // Log levels - styled as badges + { pattern: /\|\s*(INFO)\s*\|/g, className: "text-green-400", wrapGroup: 1 }, + { pattern: /\|\s*(WARN|WARNING)\s*\|/g, className: "text-yellow-400", wrapGroup: 1 }, + { pattern: /\|\s*(ERROR)\s*\|/g, className: "text-red-400 font-semibold", wrapGroup: 1 }, + { pattern: /\|\s*(DEBUG)\s*\|/g, className: "text-gray-400", wrapGroup: 1 }, + // Task identifiers - important events + { + pattern: /(taskCreated|taskFocused|taskStarted|taskCompleted|taskAborted|taskResumable)/g, + className: "text-purple-400 font-medium", + }, + // Tool failures - highlight in red + { pattern: /(taskToolFailed)/g, className: "text-red-400 font-bold" }, + { pattern: /(Tool execution failed|tool.*failed|failed.*tool)/gi, className: "text-red-400" }, + { pattern: /(EvalPass)/g, className: "text-green-400 font-bold" }, + { pattern: /(EvalFail)/g, className: "text-red-400 font-bold" }, + // Message arrows + { pattern: /→/g, className: "text-cyan-400" }, + // Tool names in quotes + { pattern: /"(tool)":\s*"([^"]+)"/g, className: "text-orange-400" }, + // JSON keys + { pattern: /"([^"]+)":/g, className: "text-sky-300" }, + // Boolean values + { pattern: /:\s*(true|false)/g, className: "text-amber-400", wrapGroup: 1 }, + // Numbers + { pattern: /:\s*(-?\d+\.?\d*)/g, className: "text-emerald-400", wrapGroup: 1 }, +] + +// Extract timestamp from a log line and return elapsed time from baseline +function formatElapsedTime(timestamp: string, baselineMs: number): string { + const currentMs = new Date(timestamp).getTime() + const elapsedMs = currentMs - baselineMs + const totalSeconds = Math.floor(elapsedMs / 1000) + const minutes = Math.floor(totalSeconds / 60) + const seconds = totalSeconds % 60 + return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}` +} + +// Extract the first timestamp from the log to use as baseline +function extractFirstTimestamp(log: string): number | null { + // Match timestamp at start of line: [2025-11-28T09:35:23.187Z | ... or [2025-11-28T09:35:23.187Z] + const match = log.match(/\[(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z)[\s|\]]/) + const isoString = match?.[1] + if (!isoString) return null + return new Date(isoString).getTime() +} + +// Simplify log line by removing redundant metadata +function simplifyLogLine(line: string, baselineMs: number | null): { timestamp: string; simplified: string } { + // Extract timestamp - matches [2025-11-28T09:35:23.187Z | ... format + const timestampMatch = line.match(/\[(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z)[\s|\]]/) + const isoTimestamp = timestampMatch?.[1] + if (!isoTimestamp) { + return { timestamp: "", simplified: line } + } + + const timestamp = baselineMs !== null ? formatElapsedTime(isoTimestamp, baselineMs) : isoTimestamp.slice(11, 19) + + // Remove the timestamp from the line (handles both [timestamp] and [timestamp | formats) + let simplified = line.replace(/\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\s*\|?\s*/, "") + + // Remove redundant metadata: pid, run, task IDs (they're same for entire log) + simplified = simplified.replace(/\|\s*pid:\d+\s*/g, "") + simplified = simplified.replace(/\|\s*run:\d+\s*/g, "") + simplified = simplified.replace(/\|\s*task:\d+\s*/g, "") + simplified = simplified.replace(/runTask\s*\|\s*/g, "") + + // Clean up extra pipes, spaces, and trailing brackets + simplified = simplified.replace(/\|\s*\|/g, "|") + simplified = simplified.replace(/^\s*\|\s*/, "") + simplified = simplified.replace(/\]\s*$/, "") // Remove trailing bracket if present + + return { timestamp, simplified } +} + +// Format a single line with syntax highlighting using React elements (XSS-safe) +function formatLine(line: string): React.ReactNode[] { + // Find all matches with their positions + type Match = { start: number; end: number; text: string; className: string } + const matches: Match[] = [] + + for (const { pattern, className, wrapGroup } of HIGHLIGHT_PATTERNS) { + // Reset regex state + pattern.lastIndex = 0 + let regexMatch + while ((regexMatch = pattern.exec(line)) !== null) { + const capturedText = wrapGroup !== undefined ? regexMatch[wrapGroup] : regexMatch[0] + // Skip if capture group didn't match + if (!capturedText) continue + const start = + wrapGroup !== undefined ? regexMatch.index + regexMatch[0].indexOf(capturedText) : regexMatch.index + matches.push({ + start, + end: start + capturedText.length, + text: capturedText, + className, + }) + } + } + + // Sort matches by position and filter overlapping ones + matches.sort((a, b) => a.start - b.start) + const filteredMatches: Match[] = [] + for (const m of matches) { + const lastMatch = filteredMatches[filteredMatches.length - 1] + if (!lastMatch || m.start >= lastMatch.end) { + filteredMatches.push(m) + } + } + + // Build result with highlighted spans + const result: React.ReactNode[] = [] + let currentPos = 0 + + for (const [i, m] of filteredMatches.entries()) { + // Add text before this match + if (m.start > currentPos) { + result.push(line.slice(currentPos, m.start)) + } + // Add highlighted match + result.push( + + {m.text} + , + ) + currentPos = m.end + } + + // Add remaining text + if (currentPos < line.length) { + result.push(line.slice(currentPos)) + } + + return result.length > 0 ? result : [line] +} + +// Determine the visual style for a log line based on its content +function getLineStyle(line: string): string { + if (line.includes("ERROR")) return "bg-red-950/30 border-l-2 border-red-500" + if (line.includes("WARN") || line.includes("WARNING")) return "bg-yellow-950/20 border-l-2 border-yellow-500" + if (line.includes("taskToolFailed")) return "bg-red-950/30 border-l-2 border-red-500" + if (line.includes("taskStarted") || line.includes("taskCreated")) return "bg-purple-950/20" + if (line.includes("EvalPass")) return "bg-green-950/30 border-l-2 border-green-500" + if (line.includes("EvalFail")) return "bg-red-950/30 border-l-2 border-red-500" + if (line.includes("taskCompleted") || line.includes("taskAborted")) return "bg-blue-950/20" + return "" +} + +// Format log content with basic highlighting (XSS-safe - no dangerouslySetInnerHTML) +function formatLogContent(log: string): React.ReactNode[] { + const lines = log.split("\n") + const baselineMs = extractFirstTimestamp(log) + + return lines.map((line, index) => { + if (!line.trim()) { + return ( +
+ {" "} +
+ ) + } + + const parsed = simplifyLogLine(line, baselineMs) + const lineStyle = getLineStyle(line) + + return ( +
+ {/* Elapsed time */} + + {parsed.timestamp} + + {/* Log content - pl-12 ensures wrapped lines are indented under the timestamp */} + + {formatLine(parsed.simplified)} + +
+ ) + }) +} + export function Run({ run }: { run: Run }) { const runStatus = useRunStatus(run) - const { tasks, tokenUsage, usageUpdatedAt } = runStatus + const { tasks, tokenUsage, toolUsage, usageUpdatedAt, heartbeat, runners } = runStatus + + const [selectedTask, setSelectedTask] = useState(null) + const [taskLog, setTaskLog] = useState(null) + const [isLoadingLog, setIsLoadingLog] = useState(false) + const [copied, setCopied] = useState(false) + const [showKillDialog, setShowKillDialog] = useState(false) + const [isKilling, setIsKilling] = useState(false) + const [groupByStatus, setGroupByStatus] = useState(() => { + // Initialize from localStorage if available (client-side only) + if (typeof window !== "undefined") { + const stored = localStorage.getItem("evals-group-by-status") + return stored === "true" + } + return false + }) + + // Persist groupByStatus to localStorage + useEffect(() => { + localStorage.setItem("evals-group-by-status", String(groupByStatus)) + }, [groupByStatus]) + + // Determine if run is still active (has heartbeat or runners) + const isRunActive = !run.taskMetricsId && (!!heartbeat || (runners && runners.length > 0)) + + const onKillRun = useCallback(async () => { + setIsKilling(true) + try { + const result = await killRun(run.id) + if (result.killedContainers.length > 0) { + toast.success(`Killed ${result.killedContainers.length} container(s)`) + } else if (result.errors.length === 0) { + toast.info("No running containers found") + } else { + toast.error(result.errors.join(", ")) + } + } catch (error) { + console.error("Failed to kill run:", error) + toast.error("Failed to kill run") + } finally { + setIsKilling(false) + setShowKillDialog(false) + } + }, [run.id]) + + const onCopyLog = useCallback(async () => { + if (!taskLog) return + + try { + await navigator.clipboard.writeText(taskLog) + setCopied(true) + toast.success("Log copied to clipboard") + setTimeout(() => setCopied(false), 2000) + } catch (error) { + console.error("Failed to copy log:", error) + toast.error("Failed to copy log") + } + }, [taskLog]) + + // Handle ESC key to close the dialog + useEffect(() => { + const handleKeyDown = (e: KeyboardEvent) => { + if (e.key === "Escape" && selectedTask) { + setSelectedTask(null) + } + } + + document.addEventListener("keydown", handleKeyDown) + return () => document.removeEventListener("keydown", handleKeyDown) + }, [selectedTask]) const taskMetrics: Record = useMemo(() => { + // Reference usageUpdatedAt to trigger recomputation when Map contents change + void usageUpdatedAt const metrics: Record = {} + // Helper to calculate duration from database timestamps when streaming duration + // is unavailable (e.g., page was loaded after TaskStarted event was published) + const calculateDurationFromTimestamps = (task: TaskWithMetrics): number => { + if (!task.startedAt) return 0 + const startTime = new Date(task.startedAt).getTime() + const endTime = task.finishedAt ? new Date(task.finishedAt).getTime() : Date.now() + return endTime - startTime + } + tasks?.forEach((task) => { - const usage = tokenUsage.get(task.id) + const streamingUsage = tokenUsage.get(task.id) + const dbMetrics = task.taskMetrics - if (task.finishedAt && task.taskMetrics) { - metrics[task.id] = task.taskMetrics - } else if (usage) { + // For finished tasks, prefer DB values but fall back to streaming values + // This handles race conditions during timeout where DB might not have latest data + if (task.finishedAt) { + // Check if DB metrics have meaningful values (not just default/empty) + const dbHasData = dbMetrics && (dbMetrics.tokensIn > 0 || dbMetrics.tokensOut > 0 || dbMetrics.cost > 0) + if (dbHasData) { + // If DB duration is 0 but we have timestamps, calculate from timestamps + const duration = dbMetrics.duration || calculateDurationFromTimestamps(task) + metrics[task.id] = { ...dbMetrics, duration } + } else if (streamingUsage) { + // Fall back to streaming values if DB is empty/stale + // Use streaming duration, or calculate from timestamps if not available + const duration = streamingUsage.duration || calculateDurationFromTimestamps(task) + metrics[task.id] = { + tokensIn: streamingUsage.totalTokensIn, + tokensOut: streamingUsage.totalTokensOut, + tokensContext: streamingUsage.contextTokens, + duration, + cost: streamingUsage.totalCost, + } + } else { + // Task finished but no DB metrics and no streaming data + // (e.g., page loaded after task completed, metrics not persisted) + // Still provide duration calculated from timestamps + metrics[task.id] = { + tokensIn: 0, + tokensOut: 0, + tokensContext: 0, + duration: calculateDurationFromTimestamps(task), + cost: 0, + } + } + } else if (streamingUsage) { + // For running tasks, use streaming values + // Use streaming duration, or calculate from task.startedAt if not available + // (happens when page loads after TaskStarted event was already published) + const duration = streamingUsage.duration || calculateDurationFromTimestamps(task) + metrics[task.id] = { + tokensIn: streamingUsage.totalTokensIn, + tokensOut: streamingUsage.totalTokensOut, + tokensContext: streamingUsage.contextTokens, + duration, + cost: streamingUsage.totalCost, + } + } else if (task.startedAt) { + // Task has started (has startedAt in DB) but no streaming data yet + // This can happen when page loads after TaskStarted but before TokenUsageUpdated metrics[task.id] = { - tokensIn: usage.totalTokensIn, - tokensOut: usage.totalTokensOut, - tokensContext: usage.contextTokens, - duration: usage.duration ?? 0, - cost: usage.totalCost, + tokensIn: 0, + tokensOut: 0, + tokensContext: 0, + duration: calculateDurationFromTimestamps(task), + cost: 0, } } }) return metrics - // eslint-disable-next-line react-hooks/exhaustive-deps }, [tasks, tokenUsage, usageUpdatedAt]) + const onViewTaskLog = useCallback( + async (task: Task) => { + // Only allow viewing logs for tasks that have started. + // Note: we treat presence of derived metrics as evidence of a started task, + // since this page may be rendered without streaming `tokenUsage` populated. + const hasStarted = !!task.startedAt || !!tokenUsage.get(task.id) || !!taskMetrics[task.id] + if (!hasStarted) { + toast.error("Task has not started yet") + return + } + + setSelectedTask(task) + setIsLoadingLog(true) + setTaskLog(null) + + try { + const response = await fetch(`/api/runs/${run.id}/logs/${task.id}`) + + if (!response.ok) { + const error = await response.json() + toast.error(error.error || "Failed to load log") + setSelectedTask(null) + return + } + + const data = await response.json() + setTaskLog(data.logContent) + } catch (error) { + console.error("Error loading task log:", error) + toast.error("Failed to load log") + setSelectedTask(null) + } finally { + setIsLoadingLog(false) + } + }, + [run.id, tokenUsage, taskMetrics], + ) + + // Collect all unique tool names from all tasks and sort by total attempts + const toolColumns = useMemo(() => { + // Reference usageUpdatedAt to trigger recomputation when Map contents change + void usageUpdatedAt + if (!tasks) return [] + + const toolTotals = new Map() + + for (const task of tasks) { + // Get both DB and streaming values + const dbToolUsage = task.taskMetrics?.toolUsage + const streamingToolUsage = toolUsage.get(task.id) + + // For finished tasks, prefer DB values but fall back to streaming values + // For running tasks, use streaming values + // This handles race conditions during timeout where DB might not have latest data + const taskToolUsage = task.finishedAt + ? dbToolUsage && Object.keys(dbToolUsage).length > 0 + ? dbToolUsage + : streamingToolUsage + : streamingToolUsage + + if (taskToolUsage) { + for (const [toolName, usage] of Object.entries(taskToolUsage)) { + const tool = toolName as ToolName + const current = toolTotals.get(tool) ?? 0 + toolTotals.set(tool, current + usage.attempts) + } + } + } + + // Sort by total attempts descending + return Array.from(toolTotals.entries()) + .sort((a, b) => b[1] - a[1]) + .map(([name]): ToolName => name) + // toolUsage ref is stable; usageUpdatedAt triggers recomputation when Map contents change + }, [tasks, toolUsage, usageUpdatedAt]) + + // Compute aggregate stats + const stats = useMemo(() => { + // Reference usageUpdatedAt to trigger recomputation when Map contents change + void usageUpdatedAt + if (!tasks) return null + + const passed = tasks.filter((t) => t.passed === true).length + const failed = tasks.filter((t) => t.passed === false).length + const completed = passed + failed + + let totalTokensIn = 0 + let totalTokensOut = 0 + let totalCost = 0 + let totalDuration = 0 + + // Aggregate tool usage from all tasks (both finished and running) + const toolUsageAggregate: ToolUsage = {} + + for (const task of tasks) { + const metrics = taskMetrics[task.id] + if (metrics) { + totalTokensIn += metrics.tokensIn + totalTokensOut += metrics.tokensOut + totalCost += metrics.cost + totalDuration += metrics.duration + } + + // Aggregate tool usage: prefer DB values for finished tasks, fall back to streaming values + // This handles race conditions during timeout where DB might not have latest data + const dbToolUsage = task.taskMetrics?.toolUsage + const streamingToolUsage = toolUsage.get(task.id) + const taskToolUsage = task.finishedAt + ? dbToolUsage && Object.keys(dbToolUsage).length > 0 + ? dbToolUsage + : streamingToolUsage + : streamingToolUsage + + if (taskToolUsage) { + for (const [key, usage] of Object.entries(taskToolUsage)) { + const tool = key as keyof ToolUsage + if (!toolUsageAggregate[tool]) { + toolUsageAggregate[tool] = { attempts: 0, failures: 0 } + } + toolUsageAggregate[tool].attempts += usage.attempts + toolUsageAggregate[tool].failures += usage.failures + } + } + } + + const remaining = tasks.length - completed + + return { + passed, + failed, + completed, + remaining, + passRate: completed > 0 ? ((passed / completed) * 100).toFixed(1) : null, + totalTokensIn, + totalTokensOut, + totalCost, + totalDuration, + toolUsage: toolUsageAggregate, + } + // Map refs are stable; usageUpdatedAt triggers recomputation when Map contents change + }, [tasks, taskMetrics, toolUsage, usageUpdatedAt]) + + // Calculate elapsed time (wall-clock time from run creation to completion or now) + const elapsedTime = useMemo(() => { + // Reference usageUpdatedAt to trigger recomputation for live elapsed time updates + void usageUpdatedAt + if (!tasks || tasks.length === 0) return null + + const startTime = new Date(run.createdAt).getTime() + + // If run is complete, find the latest finishedAt from tasks + if (run.taskMetricsId) { + const latestFinish = tasks.reduce((latest, task) => { + if (task.finishedAt) { + const finishTime = new Date(task.finishedAt).getTime() + return finishTime > latest ? finishTime : latest + } + return latest + }, startTime) + return latestFinish - startTime + } + + // If still running, use current time + return Date.now() - startTime + }, [tasks, run.createdAt, run.taskMetricsId, usageUpdatedAt]) + + // Task status categories + type TaskStatusCategory = "failed" | "in_progress" | "passed" | "not_started" + + const getTaskStatusCategory = useCallback( + (task: TaskWithMetrics): TaskStatusCategory => { + if (task.passed === false) return "failed" + if (task.passed === true) return "passed" + // Check streaming data, DB metrics, or startedAt timestamp + const hasStarted = !!task.startedAt || !!tokenUsage.get(task.id) || !!taskMetrics[task.id] + if (hasStarted) return "in_progress" + return "not_started" + }, + [tokenUsage, taskMetrics], + ) + + // Group tasks by status while preserving original index + const groupedTasks = useMemo(() => { + if (!tasks || !groupByStatus) return null + + const groups: Record> = { + failed: [], + in_progress: [], + passed: [], + not_started: [], + } + + tasks.forEach((task, index) => { + const status = getTaskStatusCategory(task) + groups[status].push({ task, originalIndex: index }) + }) + + return groups + }, [tasks, groupByStatus, getTaskStatusCategory]) + + const statusLabels = useMemo( + (): Record => ({ + failed: { label: "Failed", className: "text-red-500", count: groupedTasks?.failed.length ?? 0 }, + in_progress: { + label: "In Progress", + className: "text-yellow-500", + count: groupedTasks?.in_progress.length ?? 0, + }, + passed: { label: "Passed", className: "text-green-500", count: groupedTasks?.passed.length ?? 0 }, + not_started: { + label: "Not Started", + className: "text-muted-foreground", + count: groupedTasks?.not_started.length ?? 0, + }, + }), + [groupedTasks], + ) + + const statusOrder: TaskStatusCategory[] = ["failed", "in_progress", "passed", "not_started"] + + // Helper to render a task row + const renderTaskRow = (task: TaskWithMetrics, originalIndex: number) => { + const hasStarted = !!task.startedAt || !!tokenUsage.get(task.id) || !!taskMetrics[task.id] + return ( + hasStarted && onViewTaskLog(task)}> + + {originalIndex + 1} + + +
+ +
+ + {task.language}/{task.exercise} + {task.iteration > 1 && ( + (#{task.iteration}) + )} + + {hasStarted && ( + + + + + Click to view log + + )} +
+
+
+ {taskMetrics[task.id] ? ( + <> + +
+
{formatTokens(taskMetrics[task.id]!.tokensIn)}
/ +
{formatTokens(taskMetrics[task.id]!.tokensOut)}
+
+
+ + {formatTokens(taskMetrics[task.id]!.tokensContext)} + + {toolColumns.map((toolName) => { + const dbUsage = task.taskMetrics?.toolUsage?.[toolName] + const streamingUsage = toolUsage.get(task.id)?.[toolName] + const usage = task.finishedAt ? (dbUsage ?? streamingUsage) : streamingUsage + + const successRate = + usage && usage.attempts > 0 + ? ((usage.attempts - usage.failures) / usage.attempts) * 100 + : 100 + const rateColor = + successRate === 100 + ? "text-muted-foreground" + : successRate >= 80 + ? "text-yellow-500" + : "text-red-500" + return ( + + {usage ? ( +
+ {usage.attempts} + {formatToolUsageSuccessRate(usage)} +
+ ) : ( + - + )} +
+ ) + })} + + {taskMetrics[task.id]!.duration ? formatDuration(taskMetrics[task.id]!.duration) : "-"} + + + {formatCurrency(taskMetrics[task.id]!.cost)} + + + ) : ( + + )} +
+ ) + } + return ( <>
-
-
-
{run.model}
- {run.description &&
{run.description}
} -
- {!run.taskMetricsId && } -
{!tasks ? ( ) : ( - - - - Exercise - Tokens In / Out - Context - Duration - Cost - - - - {tasks.map((task) => ( - - -
- -
- {task.language}/{task.exercise} + <> + {/* View Toggle */} +
+ + + + + + {groupByStatus ? "Show tasks in run order" : "Group tasks by status"} + + +
+
+ + {stats && ( + + + {/* Provider, Model title and status */} +
+ {run.settings?.apiProvider && ( + + {run.settings.apiProvider} + + )} +
{run.model}
+ + {run.description && ( + + - {run.description} + + )} + {isRunActive && ( + + + + + + Stop all containers for this run + + + )}
- - - {taskMetrics[task.id] ? ( - <> - -
-
{formatTokens(taskMetrics[task.id]!.tokensIn)}
/ -
{formatTokens(taskMetrics[task.id]!.tokensOut)}
+ {/* Main Stats Row */} +
+ {/* Pass Rate / Fail Rate / Remaining % */} +
+
+ + {stats.completed > 0 + ? `${((stats.passed / stats.completed) * 100).toFixed(1)}%` + : "-"} + + / + + {stats.completed > 0 + ? `${((stats.failed / stats.completed) * 100).toFixed(1)}%` + : "-"} + + / + + {tasks.length > 0 + ? `${((stats.remaining / tasks.length) * 100).toFixed(1)}%` + : "-"} + +
+
+ {stats.passed} + {" / "} + {stats.failed} + {" / "} + {stats.remaining} + {" of "} + {tasks.length} +
- - - {formatTokens(taskMetrics[task.id]!.tokensContext)} - - - {taskMetrics[task.id]!.duration - ? formatDuration(taskMetrics[task.id]!.duration) - : "-"} - - - {formatCurrency(taskMetrics[task.id]!.cost)} - - - ) : ( - - )} + + {/* Tokens */} +
+
+ {formatTokens(stats.totalTokensIn)} + / + {formatTokens(stats.totalTokensOut)} +
+
Tokens In / Out
+
+ + {/* Cost */} +
+
+ {formatCurrency(stats.totalCost)} +
+
Cost
+
+ + {/* Duration */} +
+
+ {stats.totalDuration > 0 + ? formatDuration(stats.totalDuration) + : "-"} +
+
Duration
+
+ + {/* Elapsed Time */} +
+
+ {elapsedTime !== null ? formatDuration(elapsedTime) : "-"} +
+
Elapsed
+
+ + {/* Estimated Time Remaining - only show if run is active and we have data */} + {!run.taskMetricsId && + elapsedTime !== null && + stats.completed > 0 && + stats.remaining > 0 && ( +
+
+ ~ + {formatDuration( + (elapsedTime / stats.completed) * stats.remaining, + )} +
+
+ Est. Remaining +
+
+ )} +
+ + {/* Tool Usage Row */} + {Object.keys(stats.toolUsage).length > 0 && ( +
+ {Object.entries(stats.toolUsage) + .sort(([, a], [, b]) => b.attempts - a.attempts) + .map(([toolName, usage]) => { + const abbr = getToolAbbreviation(toolName) + const successRate = + usage.attempts > 0 + ? ((usage.attempts - usage.failures) / + usage.attempts) * + 100 + : 100 + const rateColor = + successRate === 100 + ? "text-green-500" + : successRate >= 80 + ? "text-yellow-500" + : "text-red-500" + return ( + + +
+ + {abbr} + + + {usage.attempts} + + + {formatToolUsageSuccessRate(usage)} + +
+
+ + {toolName} + +
+ ) + })} +
+ )} + + + )} + + # + Exercise + Tokens In / Out + Context + {toolColumns.map((toolName) => ( + + + {getToolAbbreviation(toolName)} + {toolName} + + + ))} + Duration + Cost - ))} - -
+ + + {groupByStatus && groupedTasks + ? // Grouped view + statusOrder.map((status) => { + const group = groupedTasks[status] + if (group.length === 0) return null + const { label, className } = statusLabels[status] + return ( + + + + + {label} ({group.length}) + + + + {group.map(({ task, originalIndex }) => + renderTaskRow(task, originalIndex), + )} + + ) + }) + : // Default order view + tasks.map((task, index) => renderTaskRow(task, index))} + + + )}
+ + {/* Task Log Dialog - Full Screen */} + setSelectedTask(null)}> + + +
+ + + {selectedTask?.language}/{selectedTask?.exercise} + {selectedTask?.iteration && selectedTask.iteration > 1 && ( + (#{selectedTask.iteration}) + )} + + ( + {selectedTask?.passed === true + ? "Passed" + : selectedTask?.passed === false + ? "Failed" + : "Running"} + ) + + + {taskLog && ( + + )} +
+
+
+ {isLoadingLog ? ( +
+ +
+ ) : taskLog ? ( + +
+ {formatLogContent(taskLog)} +
+
+ ) : ( +
+ Log file not available (may have been cleared) +
+ )} +
+
+
+ + {/* Kill Run Confirmation Dialog */} + + + + Kill Run? + + This will stop the controller and all task runner containers for this run. Any running tasks + will be terminated immediately. This action cannot be undone. + + + + Cancel + + {isKilling ? ( + <> + + Killing... + + ) : ( + "Kill Run" + )} + + + + ) } diff --git a/apps/web-evals/src/app/runs/new/new-run.tsx b/apps/web-evals/src/app/runs/new/new-run.tsx index f8633611b61..be015ac8ca3 100644 --- a/apps/web-evals/src/app/runs/new/new-run.tsx +++ b/apps/web-evals/src/app/runs/new/new-run.tsx @@ -1,39 +1,53 @@ "use client" -import { useCallback, useRef, useState } from "react" +import { useCallback, useEffect, useMemo, useState } from "react" import { useRouter } from "next/navigation" import { z } from "zod" import { useQuery } from "@tanstack/react-query" import { useForm, FormProvider } from "react-hook-form" import { zodResolver } from "@hookform/resolvers/zod" -import fuzzysort from "fuzzysort" import { toast } from "sonner" -import { X, Rocket, Check, ChevronsUpDown, SlidersHorizontal, Book, CircleCheck } from "lucide-react" +import { X, Rocket, Check, ChevronsUpDown, SlidersHorizontal, Info, Plus, Minus } from "lucide-react" -import { globalSettingsSchema, providerSettingsSchema, EVALS_SETTINGS, getModelId } from "@roo-code/types" +import { + globalSettingsSchema, + providerSettingsSchema, + EVALS_SETTINGS, + getModelId, + type ProviderSettings, + type GlobalSettings, +} from "@roo-code/types" import { createRun } from "@/actions/runs" import { getExercises } from "@/actions/exercises" + import { - createRunSchema, type CreateRun, - MODEL_DEFAULT, + createRunSchema, CONCURRENCY_MIN, CONCURRENCY_MAX, CONCURRENCY_DEFAULT, TIMEOUT_MIN, TIMEOUT_MAX, TIMEOUT_DEFAULT, + ITERATIONS_MIN, + ITERATIONS_MAX, + ITERATIONS_DEFAULT, } from "@/lib/schemas" import { cn } from "@/lib/utils" + import { useOpenRouterModels } from "@/hooks/use-open-router-models" +import { useRooCodeCloudModels } from "@/hooks/use-roo-code-cloud-models" + import { Button, + Checkbox, FormControl, FormField, FormItem, FormLabel, FormMessage, + Input, Textarea, Tabs, TabsList, @@ -48,39 +62,78 @@ import { Popover, PopoverContent, PopoverTrigger, - ScrollArea, Slider, - Dialog, - DialogContent, - DialogTitle, - DialogFooter, + Label, + Tooltip, + TooltipContent, + TooltipTrigger, } from "@/components/ui" import { SettingsDiff } from "./settings-diff" +type ImportedSettings = { + apiConfigs: Record + globalSettings: GlobalSettings + currentApiConfigName: string +} + +// Type for a model selection entry +type ModelSelection = { + id: string + model: string + popoverOpen: boolean +} + +// Type for a config selection entry (for import mode) +type ConfigSelection = { + id: string + configName: string + popoverOpen: boolean +} + export function NewRun() { const router = useRouter() - const [mode, setMode] = useState<"openrouter" | "settings">("openrouter") - const [modelSearchValue, setModelSearchValue] = useState("") - const [modelPopoverOpen, setModelPopoverOpen] = useState(false) - - const modelSearchResultsRef = useRef>(new Map()) - const modelSearchValueRef = useRef("") + const [provider, setModelSource] = useState<"roo" | "openrouter" | "other">("other") + const [useNativeToolProtocol, setUseNativeToolProtocol] = useState(true) + const [commandExecutionTimeout, setCommandExecutionTimeout] = useState(20) + const [terminalShellIntegrationTimeout, setTerminalShellIntegrationTimeout] = useState(30) // seconds + + // State for multiple model selections + const [modelSelections, setModelSelections] = useState([ + { id: crypto.randomUUID(), model: "", popoverOpen: false }, + ]) + + // State for imported settings with multiple config selections + const [importedSettings, setImportedSettings] = useState(null) + const [configSelections, setConfigSelections] = useState([ + { id: crypto.randomUUID(), configName: "", popoverOpen: false }, + ]) + + const openRouter = useOpenRouterModels() + const rooCodeCloud = useRooCodeCloudModels() + const models = provider === "openrouter" ? openRouter.data : rooCodeCloud.data + const searchValue = provider === "openrouter" ? openRouter.searchValue : rooCodeCloud.searchValue + const setSearchValue = provider === "openrouter" ? openRouter.setSearchValue : rooCodeCloud.setSearchValue + const onFilter = provider === "openrouter" ? openRouter.onFilter : rooCodeCloud.onFilter - const models = useOpenRouterModels() const exercises = useQuery({ queryKey: ["getExercises"], queryFn: () => getExercises() }) + // State for selected exercises (needed for language toggle buttons) + const [selectedExercises, setSelectedExercises] = useState([]) + const form = useForm({ resolver: zodResolver(createRunSchema), defaultValues: { - model: MODEL_DEFAULT, + model: "", description: "", suite: "full", exercises: [], settings: undefined, concurrency: CONCURRENCY_DEFAULT, timeout: TIMEOUT_DEFAULT, + iterations: ITERATIONS_DEFAULT, + jobToken: "", }, }) @@ -91,57 +144,283 @@ export function NewRun() { formState: { isSubmitting }, } = form - const [model, suite, settings] = watch(["model", "suite", "settings", "concurrency"]) - - const [systemPromptDialogOpen, setSystemPromptDialogOpen] = useState(false) - const [systemPrompt, setSystemPrompt] = useState("") - const systemPromptRef = useRef(null) + const [suite, settings] = watch(["suite", "settings", "concurrency"]) - const onSubmit = useCallback( - async (values: CreateRun) => { - try { - if (mode === "openrouter") { - values.settings = { ...(values.settings || {}), openRouterModelId: model } + // Load settings from localStorage on mount + useEffect(() => { + const savedConcurrency = localStorage.getItem("evals-concurrency") + if (savedConcurrency) { + const parsed = parseInt(savedConcurrency, 10) + if (!isNaN(parsed) && parsed >= CONCURRENCY_MIN && parsed <= CONCURRENCY_MAX) { + setValue("concurrency", parsed) + } + } + const savedTimeout = localStorage.getItem("evals-timeout") + if (savedTimeout) { + const parsed = parseInt(savedTimeout, 10) + if (!isNaN(parsed) && parsed >= TIMEOUT_MIN && parsed <= TIMEOUT_MAX) { + setValue("timeout", parsed) + } + } + const savedCommandTimeout = localStorage.getItem("evals-command-execution-timeout") + if (savedCommandTimeout) { + const parsed = parseInt(savedCommandTimeout, 10) + if (!isNaN(parsed) && parsed >= 20 && parsed <= 60) { + setCommandExecutionTimeout(parsed) + } + } + const savedShellTimeout = localStorage.getItem("evals-shell-integration-timeout") + if (savedShellTimeout) { + const parsed = parseInt(savedShellTimeout, 10) + if (!isNaN(parsed) && parsed >= 30 && parsed <= 60) { + setTerminalShellIntegrationTimeout(parsed) + } + } + // Load saved exercises selection + const savedSuite = localStorage.getItem("evals-suite") + if (savedSuite === "partial") { + setValue("suite", "partial") + const savedExercises = localStorage.getItem("evals-exercises") + if (savedExercises) { + try { + const parsed = JSON.parse(savedExercises) as string[] + if (Array.isArray(parsed)) { + setSelectedExercises(parsed) + setValue("exercises", parsed) + } + } catch { + // Invalid JSON, ignore } - - const { id } = await createRun({ ...values, systemPrompt }) - router.push(`/runs/${id}`) - } catch (e) { - toast.error(e instanceof Error ? e.message : "An unknown error occurred.") } + } + }, [setValue]) + + // Extract unique languages from exercises + const languages = useMemo(() => { + if (!exercises.data) return [] + const langs = new Set() + for (const path of exercises.data) { + const lang = path.split("/")[0] + if (lang) langs.add(lang) + } + return Array.from(langs).sort() + }, [exercises.data]) + + // Get exercises for a specific language + const getExercisesForLanguage = useCallback( + (lang: string) => { + if (!exercises.data) return [] + return exercises.data.filter((path) => path.startsWith(`${lang}/`)) }, - [mode, model, router, systemPrompt], + [exercises.data], ) - const onFilterModels = useCallback( - (value: string, search: string) => { - if (modelSearchValueRef.current !== search) { - modelSearchValueRef.current = search - modelSearchResultsRef.current.clear() - - for (const { - obj: { id }, - score, - } of fuzzysort.go(search, models.data || [], { - key: "name", - })) { - modelSearchResultsRef.current.set(id, score) + // Toggle all exercises for a language + const toggleLanguage = useCallback( + (lang: string) => { + const langExercises = getExercisesForLanguage(lang) + const allSelected = langExercises.every((ex) => selectedExercises.includes(ex)) + + let newSelected: string[] + if (allSelected) { + // Remove all exercises for this language + newSelected = selectedExercises.filter((ex) => !ex.startsWith(`${lang}/`)) + } else { + // Add all exercises for this language (avoiding duplicates) + const existing = new Set(selectedExercises) + for (const ex of langExercises) { + existing.add(ex) } + newSelected = Array.from(existing) } - return modelSearchResultsRef.current.get(value) ?? 0 + setSelectedExercises(newSelected) + setValue("exercises", newSelected) + localStorage.setItem("evals-exercises", JSON.stringify(newSelected)) }, - [models.data], + [getExercisesForLanguage, selectedExercises, setValue], ) - const onSelectModel = useCallback( - (model: string) => { + // Check if all exercises for a language are selected + const isLanguageSelected = useCallback( + (lang: string) => { + const langExercises = getExercisesForLanguage(lang) + return langExercises.length > 0 && langExercises.every((ex) => selectedExercises.includes(ex)) + }, + [getExercisesForLanguage, selectedExercises], + ) + + // Check if some (but not all) exercises for a language are selected + const isLanguagePartiallySelected = useCallback( + (lang: string) => { + const langExercises = getExercisesForLanguage(lang) + const selectedCount = langExercises.filter((ex) => selectedExercises.includes(ex)).length + return selectedCount > 0 && selectedCount < langExercises.length + }, + [getExercisesForLanguage, selectedExercises], + ) + + // Add a new model selection + const addModelSelection = useCallback(() => { + setModelSelections((prev) => [...prev, { id: crypto.randomUUID(), model: "", popoverOpen: false }]) + }, []) + + // Remove a model selection + const removeModelSelection = useCallback((id: string) => { + setModelSelections((prev) => prev.filter((s) => s.id !== id)) + }, []) + + // Update a model selection + const updateModelSelection = useCallback( + (id: string, model: string) => { + setModelSelections((prev) => prev.map((s) => (s.id === id ? { ...s, model, popoverOpen: false } : s))) + // Also set the form model field for validation (use first non-empty model) setValue("model", model) - setModelPopoverOpen(false) }, [setValue], ) + // Toggle popover for a model selection + const toggleModelPopover = useCallback((id: string, open: boolean) => { + setModelSelections((prev) => prev.map((s) => (s.id === id ? { ...s, popoverOpen: open } : s))) + }, []) + + // Add a new config selection + const addConfigSelection = useCallback(() => { + setConfigSelections((prev) => [...prev, { id: crypto.randomUUID(), configName: "", popoverOpen: false }]) + }, []) + + // Remove a config selection + const removeConfigSelection = useCallback((id: string) => { + setConfigSelections((prev) => prev.filter((s) => s.id !== id)) + }, []) + + // Update a config selection + const updateConfigSelection = useCallback( + (id: string, configName: string) => { + setConfigSelections((prev) => prev.map((s) => (s.id === id ? { ...s, configName, popoverOpen: false } : s))) + // Also update the form settings for the first config (for validation) + if (importedSettings) { + const providerSettings = importedSettings.apiConfigs[configName] ?? {} + setValue("model", getModelId(providerSettings) ?? "") + setValue("settings", { ...EVALS_SETTINGS, ...providerSettings, ...importedSettings.globalSettings }) + } + }, + [importedSettings, setValue], + ) + + // Toggle popover for a config selection + const toggleConfigPopover = useCallback((id: string, open: boolean) => { + setConfigSelections((prev) => prev.map((s) => (s.id === id ? { ...s, popoverOpen: open } : s))) + }, []) + + const onSubmit = useCallback( + async (values: CreateRun) => { + try { + // Validate jobToken for Roo Code Cloud provider + if (provider === "roo" && !values.jobToken?.trim()) { + toast.error("Roo Code Cloud Token is required") + return + } + + // Determine which selections to use based on provider + const selectionsToLaunch: { model: string; configName?: string }[] = [] + + if (provider === "other") { + // For import mode, use config selections + for (const config of configSelections) { + if (config.configName) { + selectionsToLaunch.push({ model: "", configName: config.configName }) + } + } + } else { + // For openrouter/roo, use model selections + for (const selection of modelSelections) { + if (selection.model) { + selectionsToLaunch.push({ model: selection.model }) + } + } + } + + if (selectionsToLaunch.length === 0) { + toast.error("Please select at least one model or config") + return + } + + // Show launching toast + const totalRuns = selectionsToLaunch.length + toast.info(totalRuns > 1 ? `Launching ${totalRuns} runs (every 20 seconds)...` : "Launching run...") + + // Launch runs with 20-second delay between each + for (let i = 0; i < selectionsToLaunch.length; i++) { + const selection = selectionsToLaunch[i]! + + // Wait 20 seconds between runs (except for the first one) + if (i > 0) { + await new Promise((resolve) => setTimeout(resolve, 20000)) + } + + const runValues = { ...values } + + if (provider === "openrouter") { + runValues.model = selection.model + runValues.settings = { + ...(runValues.settings || {}), + apiProvider: "openrouter", + openRouterModelId: selection.model, + toolProtocol: useNativeToolProtocol ? "native" : "xml", + commandExecutionTimeout, + terminalShellIntegrationTimeout: terminalShellIntegrationTimeout * 1000, + } + } else if (provider === "roo") { + runValues.model = selection.model + runValues.settings = { + ...(runValues.settings || {}), + apiProvider: "roo", + apiModelId: selection.model, + toolProtocol: useNativeToolProtocol ? "native" : "xml", + commandExecutionTimeout, + terminalShellIntegrationTimeout: terminalShellIntegrationTimeout * 1000, + } + } else if (provider === "other" && selection.configName && importedSettings) { + const providerSettings = importedSettings.apiConfigs[selection.configName] ?? {} + runValues.model = getModelId(providerSettings) ?? "" + runValues.settings = { + ...EVALS_SETTINGS, + ...providerSettings, + ...importedSettings.globalSettings, + toolProtocol: useNativeToolProtocol ? "native" : "xml", + commandExecutionTimeout, + terminalShellIntegrationTimeout: terminalShellIntegrationTimeout * 1000, + } + } + + try { + await createRun(runValues) + toast.success(`Run ${i + 1}/${totalRuns} launched`) + } catch (e) { + toast.error(`Run ${i + 1} failed: ${e instanceof Error ? e.message : "Unknown error"}`) + } + } + + // Navigate back to main evals UI + router.push("/") + } catch (e) { + toast.error(e instanceof Error ? e.message : "An unknown error occurred.") + } + }, + [ + provider, + modelSelections, + configSelections, + importedSettings, + router, + useNativeToolProtocol, + commandExecutionTimeout, + terminalShellIntegrationTimeout, + ], + ) + const onImportSettings = useCallback( async (event: React.ChangeEvent) => { const file = event.target.files?.[0] @@ -163,11 +442,21 @@ export function NewRun() { }) .parse(JSON.parse(await file.text())) - const providerSettings = providerProfiles.apiConfigs[providerProfiles.currentApiConfigName] ?? {} + // Store all imported configs for user selection + setImportedSettings({ + apiConfigs: providerProfiles.apiConfigs, + globalSettings, + currentApiConfigName: providerProfiles.currentApiConfigName, + }) + + // Default to the current config for the first selection + const defaultConfigName = providerProfiles.currentApiConfigName + setConfigSelections([{ id: crypto.randomUUID(), configName: defaultConfigName, popoverOpen: false }]) + // Apply the default config + const providerSettings = providerProfiles.apiConfigs[defaultConfigName] ?? {} setValue("model", getModelId(providerSettings) ?? "") setValue("settings", { ...EVALS_SETTINGS, ...providerSettings, ...globalSettings }) - setMode("settings") event.target.value = "" } catch (e) { @@ -184,115 +473,291 @@ export function NewRun() {
-
- {mode === "openrouter" && ( - ( - - - - + + + {importedSettings && Object.keys(importedSettings.apiConfigs).length > 0 && ( +
+ + {configSelections.map((selection, index) => ( +
+ + toggleConfigPopover(selection.id, open) + }> + + + + + + + + No config found. + + {Object.keys( + importedSettings.apiConfigs, + ).map((configName) => ( + + updateConfigSelection( + selection.id, + configName, + ) + }> + {configName} + {configName === + importedSettings.currentApiConfigName && ( + + (default) + + )} + + + ))} + + + + + + {index === configSelections.length - 1 ? ( + + ) : ( + + )}
- - - - - - + )} + +
+ +
+ +
+
- - - - {settings && ( - + {settings && ( + + )} +
+ ) : ( <> -
- -
- Imported valid Roo Code settings. Showing differences from default - settings. +
+ {modelSelections.map((selection, index) => ( +
+ toggleModelPopover(selection.id, open)}> + + + + + + + + No model found. + + {models?.map(({ id, name }) => ( + + updateModelSelection( + selection.id, + id, + ) + }> + {name} + + + ))} + + + + + + {index === modelSelections.length - 1 ? ( + + ) : ( + + )} +
+ ))} +
+ +
+ +
+
- - - )} - - + )} - + + + )} + /> - - - Override System Prompt -