Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 148 additions & 0 deletions chat-history.jsonl

Large diffs are not rendered by default.

87 changes: 87 additions & 0 deletions docs/macos-window-screenshot-feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# macOS Window-Specific Screenshot Feature

## Summary

Added the ability to capture screenshots of specific windows and applications on macOS, in addition to the existing full-screen capture functionality.

## New Features

### For LLM/AI Tools:

1. **TakeScreenshotOfWindowWithTitle(title)** - Capture window by title (partial match)
- Example: `TakeScreenshotOfWindowWithTitle("cycod")`
- Returns screenshot or helpful error if multiple matches

2. **TakeScreenshotOfApp(appName)** - Capture window by application name
- Example: `TakeScreenshotOfApp("Warp")`
- Returns screenshot or helpful error if multiple matches

3. **TakeScreenshotOfDisplay(displayNumber)** - Capture specific display
- Example: `TakeScreenshotOfDisplay(2)` for secondary monitor

4. **ListWindows()** - Enumerate all visible windows
- Returns JSON array with window metadata
- Useful for exploration before capturing

5. **TakeScreenshotOfWindow(windowId)** - Advanced: capture by window ID
- Requires calling ListWindows() first to get IDs

### Implementation Details:

- **Platform**: macOS only (guarded with `#if OSX`)
- **Method**: Hybrid approach:
- Uses Core Graphics `CGWindowListCopyWindowInfo` for window enumeration (no permissions required)
- Uses `screencapture -l <windowid>` for actual capture (no permissions required)
- **Matching**: Case-insensitive, partial matching for app names and titles
- **Filtering**: Only shows normal application windows (layer 0) with titles
- **Error Handling**: Returns helpful messages when multiple windows match

## Files Modified:

1. **src/cycod/Helpers/WindowInfo.cs** (NEW)
- Data class for window metadata
- Contains: WindowId, ProcessId, ApplicationName, WindowTitle, Bounds, etc.

2. **src/cycod/Helpers/ScreenshotHelper.cs** (MODIFIED)
- Added window enumeration methods
- Added new screenshot capture methods
- Added Core Foundation P/Invoke for window APIs

3. **src/cycod/FunctionCallingTools/ScreenshotHelperFunctions.cs** (MODIFIED)
- Added AI tool wrappers for new methods
- Returns screenshots as DataContent or error messages
- Returns window list as JSON string

## Usage Examples (from LLM perspective):

```
User: "Take a screenshot of my Warp terminal"
AI: TakeScreenshotOfApp("Warp")

User: "Take a screenshot of the window with 'cycod' in the title"
AI: TakeScreenshotOfWindowWithTitle("cycod")

User: "What windows are open?"
AI: ListWindows()

User: "Take a screenshot of my second monitor"
AI: TakeScreenshotOfDisplay(2)
```

## Benefits:

- ✅ No Screen Recording permissions required
- ✅ LLM can target specific windows easily
- ✅ Simple one-call operations for common use cases
- ✅ Advanced APIs available for power users
- ✅ Helpful error messages when multiple matches exist
- ✅ Clean, maintainable code with proper platform guards

## Testing:

Build succeeded with 0 warnings, 0 errors.

To test manually:
1. Run: `./src/cycod/bin/Debug/net9.0/cycod chat`
2. Ask the AI to list windows: "List all windows"
3. Ask the AI to screenshot specific app: "Take a screenshot of Warp"
156 changes: 152 additions & 4 deletions src/cycod/FunctionCallingTools/ScreenshotHelperFunctions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ public ScreenshotHelperFunctions(ChatCommand chatCommand)
_chatCommand = chatCommand;
}

#if WINDOWS
[Description("Take a screenshot of the primary screen and add it to the conversation. The screenshot will be included in the next message exchange. Only works on Windows.")]
#if WINDOWS || OSX
[Description("Take a screenshot of the primary screen and add it to the conversation. The screenshot will be included in the next message exchange. Works on Windows and macOS.")]
public object TakeScreenshot()
{
// Check platform support
if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && !RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
return ScreenshotHelper.GetPlatformErrorMessage();
}
Expand All @@ -27,7 +27,13 @@ public object TakeScreenshot()
// Capture screenshot
var filePath = ScreenshotHelper.TakeScreenshot();
var fileExists = FileHelpers.FileExists(filePath);
if (!fileExists) return "Failed to capture screenshot. Please check that the display is accessible.";
if (!fileExists)
{
// Use macOS-specific error message if on macOS, otherwise generic message
return RuntimeInformation.IsOSPlatform(OSPlatform.OSX)
? ScreenshotHelper.GetMacOSPermissionErrorMessage()
: "Failed to capture screenshot. Please check that the display is accessible.";
}

// Load the screenshot and return as DataContent for immediate inclusion
try
Expand All @@ -49,5 +55,147 @@ public object TakeScreenshot()
}
#endif

#if OSX
[Description("Take a screenshot of a window with matching title (partial match, case-insensitive). Returns the screenshot file path or an error message. Example: title='cycod' or title='Microsoft Edge'.")]
public object TakeScreenshotOfWindowWithTitle(string title)
{
if (!RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
return "Window-specific screenshots are only available on macOS.";
}

try
{
var result = ScreenshotHelper.TakeScreenshotOfWindowWithTitle(title);

// If result is an existing file path, load and return as DataContent
if (!string.IsNullOrEmpty(result) && File.Exists(result))
{
var imageBytes = File.ReadAllBytes(result);
var mediaType = ImageResolver.GetMediaTypeFromFileExtension(result);
return new DataContent(imageBytes, mediaType);
}

// Otherwise it's an error message
return result;
}
catch (Exception ex)
{
return $"Error capturing screenshot by title: {ex.Message}";
}
}

[Description("Take a screenshot of a window from the specified application (partial match, case-insensitive). Returns the screenshot file path or an error message. Example: appName='Warp' or appName='Code'.")]
public object TakeScreenshotOfApp(string appName)
{
if (!RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
return "Window-specific screenshots are only available on macOS.";
}

try
{
var result = ScreenshotHelper.TakeScreenshotOfApp(appName);

// If result starts with '/', it's a file path - load and return as DataContent
if (result.StartsWith('/') || result.StartsWith(Path.GetTempPath()))
{
var imageBytes = File.ReadAllBytes(result);
var mediaType = ImageResolver.GetMediaTypeFromFileExtension(result);
return new DataContent(imageBytes, mediaType);
}

// Otherwise it's an error message
return result;
}
catch (Exception ex)
{
return $"Error capturing screenshot by app: {ex.Message}";
}
}

[Description("Take a screenshot of a specific display. displayNumber: 1 for main display, 2 for secondary, etc. Returns the screenshot or an error message.")]
public object TakeScreenshotOfDisplay(int displayNumber)
{
if (!RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
return "Display-specific screenshots are only available on macOS.";
}

try
{
var filePath = ScreenshotHelper.TakeScreenshotOfDisplay(displayNumber);
if (filePath == null)
{
return $"Failed to capture screenshot of display {displayNumber}. Please check that the display exists.";
}

var imageBytes = File.ReadAllBytes(filePath);
var mediaType = ImageResolver.GetMediaTypeFromFileExtension(filePath);
return new DataContent(imageBytes, mediaType);
}
catch (Exception ex)
{
return $"Error capturing screenshot of display {displayNumber}: {ex.Message}";
}
}

[Description("List all visible application windows with their metadata (window ID, app name, title, position, size). Returns a JSON array of window information. Useful for finding windows before capturing them.")]
public object ListWindows()
{
if (!RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
return "Window enumeration is only available on macOS.";
}

try
{
var windows = ScreenshotHelper.EnumerateWindows();

if (windows.Count == 0)
{
return "No application windows found.";
}

var json = System.Text.Json.JsonSerializer.Serialize(windows, new System.Text.Json.JsonSerializerOptions
{
WriteIndented = true
});

return json;
}
catch (Exception ex)
{
return $"Error listing windows: {ex.Message}";
}
}

[Description("Take a screenshot of a specific window by ID. Use ListWindows() first to get window IDs. Returns the screenshot or an error message. This is an advanced method - prefer TakeScreenshotOfWindowWithTitle or TakeScreenshotOfApp for simpler use.")]
public object TakeScreenshotOfWindow(int windowId)
{
if (!RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
return "Window-specific screenshots are only available on macOS.";
}

try
{
var filePath = ScreenshotHelper.TakeScreenshotOfWindow(windowId);
if (filePath == null)
{
return $"Failed to capture screenshot of window {windowId}. Please check that the window ID is valid.";
}

var imageBytes = File.ReadAllBytes(filePath);
var mediaType = ImageResolver.GetMediaTypeFromFileExtension(filePath);
return new DataContent(imageBytes, mediaType);
}
catch (Exception ex)
{
return $"Error capturing screenshot of window {windowId}: {ex.Message}";
}
}
#endif

private readonly ChatCommand _chatCommand;
}
Loading