From 7da1c0fa5aba12d81369afe89fc6443c0b652cd4 Mon Sep 17 00:00:00 2001 From: Tyler Longwell Date: Thu, 12 Feb 2026 12:55:58 -0500 Subject: [PATCH] feat: improve search_code for LLM agent consumption - Enable TextMatch on search_code (Accept: text-match+json header) so results include code fragment context without a second API call - Fix search_code tool description: document only legacy REST API qualifiers, warn about unsupported new-search syntax (content:, symbol:, is:, NOT, OR, regex, globs) that silently fails - Add MinimalCodeResult/MinimalCodeSearchResult structs to strip ~2KB Repository bloat per result (~6x token reduction) - Rename 'q' -> 'query' param in SearchCode and SearchUsers for consistency with SearchRepositories (ref: upstream #221) - Improve search_repositories and search_users descriptions with examples - Add tests: TextMatchFragments, ZeroResults, TextMatchHeaderSent (16/16 pass, go build/vet/gofmt clean) --- pkg/github/search.go | 103 ++++++++++++++++++--- pkg/github/search_test.go | 186 +++++++++++++++++++++++++++++++++----- 2 files changed, 256 insertions(+), 33 deletions(-) diff --git a/pkg/github/search.go b/pkg/github/search.go index 1130e9cf6..63fc3a88e 100644 --- a/pkg/github/search.go +++ b/pkg/github/search.go @@ -12,10 +12,34 @@ import ( "github.com/mark3labs/mcp-go/server" ) +// MinimalCodeResult is a compact representation of a code search result +// optimized for LLM consumption. +type MinimalCodeResult struct { + Name string `json:"name"` + Path string `json:"path"` + HTMLURL string `json:"html_url"` + Repository string `json:"repository"` // "owner/repo" format + TextMatches []string `json:"text_matches,omitempty"` // code fragments +} + +// MinimalCodeSearchResult is a compact representation of a code search response. +type MinimalCodeSearchResult struct { + TotalCount int `json:"total_count"` + IncompleteResults bool `json:"incomplete_results"` + Items []MinimalCodeResult `json:"items"` +} + // SearchRepositories creates a tool to search for GitHub repositories. func SearchRepositories(getClient GetClientFn, t translations.TranslationHelperFunc) (tool mcp.Tool, handler server.ToolHandlerFunc) { return mcp.NewTool("search_repositories", - mcp.WithDescription(t("TOOL_SEARCH_REPOSITORIES_DESCRIPTION", "Search for GitHub repositories")), + mcp.WithDescription(t("TOOL_SEARCH_REPOSITORIES_DESCRIPTION", `Search for GitHub repositories by name, description, topics, or other metadata. + +Useful for discovering projects, finding repos by topic, or locating specific repositories. + +Examples: +- "machine learning" language:python stars:>100 +- topic:kubernetes org:myorg +- "payment" in:name org:myorg`)), mcp.WithToolAnnotation(mcp.ToolAnnotation{ Title: t("TOOL_SEARCH_REPOSITORIES_USER_TITLE", "Search repositories"), ReadOnlyHint: toBoolPtr(true), @@ -82,14 +106,40 @@ func SearchRepositories(getClient GetClientFn, t translations.TranslationHelperF // SearchCode creates a tool to search for code across GitHub repositories. func SearchCode(getClient GetClientFn, t translations.TranslationHelperFunc) (tool mcp.Tool, handler server.ToolHandlerFunc) { return mcp.NewTool("search_code", - mcp.WithDescription(t("TOOL_SEARCH_CODE_DESCRIPTION", "Search for code across GitHub repositories")), + mcp.WithDescription(t("TOOL_SEARCH_CODE_DESCRIPTION", `Search for code across GitHub repositories using the REST API. + +IMPORTANT: This tool uses GitHub's legacy code search API. Only the qualifiers listed below are supported. +Do NOT use: content:, symbol:, is:, NOT, OR, parentheses, /regex/, or glob patterns — they will silently fail. + +Supported qualifiers: +- org:NAME or user:NAME — scope to an organization (ALWAYS include this for broad searches) +- repo:OWNER/NAME — scope to a specific repository +- language:NAME — filter by programming language +- path:DIRECTORY — filter by directory path (basic only, no glob) +- filename:NAME — find files by name +- extension:EXT — find files by extension +- in:file or in:path — search file contents vs file paths +- size:N — filter by file size (e.g. size:>1000) +- "exact phrase" — quoted exact string match +- fork:true — include results from forked repositories +- Multiple terms are AND'd automatically + +Rate limit: 10 searches per minute. Plan your query carefully before searching. + +Examples: +- "class AuthHandler" language:python org:myorg +- filename:Dockerfile org:myorg +- "import express" extension:ts repo:owner/repo +- path:src/api "middleware" language:go org:myorg + +After finding files, use get_file_contents to read the full source code.`)), mcp.WithToolAnnotation(mcp.ToolAnnotation{ Title: t("TOOL_SEARCH_CODE_USER_TITLE", "Search code"), ReadOnlyHint: toBoolPtr(true), }), - mcp.WithString("q", + mcp.WithString("query", mcp.Required(), - mcp.Description("Search query using GitHub code search syntax"), + mcp.Description("Search query using GitHub code search qualifiers. Always scope with org: or repo: for best results."), ), mcp.WithString("sort", mcp.Description("Sort field ('indexed' only)"), @@ -101,7 +151,7 @@ func SearchCode(getClient GetClientFn, t translations.TranslationHelperFunc) (to WithPagination(), ), func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { - query, err := requiredParam[string](request, "q") + query, err := requiredParam[string](request, "query") if err != nil { return mcp.NewToolResultError(err.Error()), nil } @@ -128,8 +178,9 @@ func SearchCode(getClient GetClientFn, t translations.TranslationHelperFunc) (to } opts := &github.SearchOptions{ - Sort: sort, - Order: order, + Sort: sort, + Order: order, + TextMatch: true, ListOptions: github.ListOptions{ PerPage: pagination.perPage, Page: pagination.page, @@ -155,7 +206,31 @@ func SearchCode(getClient GetClientFn, t translations.TranslationHelperFunc) (to return mcp.NewToolResultError(fmt.Sprintf("failed to search code: %s", string(body))), nil } - r, err := json.Marshal(result) + // Format as minimal results for LLM consumption + minimalResults := make([]MinimalCodeResult, 0, len(result.CodeResults)) + for _, cr := range result.CodeResults { + mr := MinimalCodeResult{ + Name: cr.GetName(), + Path: cr.GetPath(), + HTMLURL: cr.GetHTMLURL(), + Repository: cr.GetRepository().GetFullName(), + } + // Extract text match fragments + for _, tm := range cr.TextMatches { + if tm.Fragment != nil { + mr.TextMatches = append(mr.TextMatches, *tm.Fragment) + } + } + minimalResults = append(minimalResults, mr) + } + + minimalResult := MinimalCodeSearchResult{ + TotalCount: result.GetTotal(), + IncompleteResults: result.GetIncompleteResults(), + Items: minimalResults, + } + + r, err := json.Marshal(minimalResult) if err != nil { return nil, fmt.Errorf("failed to marshal response: %w", err) } @@ -167,14 +242,18 @@ func SearchCode(getClient GetClientFn, t translations.TranslationHelperFunc) (to // SearchUsers creates a tool to search for GitHub users. func SearchUsers(getClient GetClientFn, t translations.TranslationHelperFunc) (tool mcp.Tool, handler server.ToolHandlerFunc) { return mcp.NewTool("search_users", - mcp.WithDescription(t("TOOL_SEARCH_USERS_DESCRIPTION", "Search for GitHub users")), + mcp.WithDescription(t("TOOL_SEARCH_USERS_DESCRIPTION", `Search for GitHub users by username, name, location, or other profile information. + +Examples: +- "john" location:seattle +- followers:>100 language:go`)), mcp.WithToolAnnotation(mcp.ToolAnnotation{ Title: t("TOOL_SEARCH_USERS_USER_TITLE", "Search users"), ReadOnlyHint: toBoolPtr(true), }), - mcp.WithString("q", + mcp.WithString("query", mcp.Required(), - mcp.Description("Search query using GitHub users search syntax"), + mcp.Description("Search query for GitHub users. Examples: 'location:seattle', 'followers:>100'."), ), mcp.WithString("sort", mcp.Description("Sort field by category"), @@ -187,7 +266,7 @@ func SearchUsers(getClient GetClientFn, t translations.TranslationHelperFunc) (t WithPagination(), ), func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { - query, err := requiredParam[string](request, "q") + query, err := requiredParam[string](request, "query") if err != nil { return mcp.NewToolResultError(err.Error()), nil } diff --git a/pkg/github/search_test.go b/pkg/github/search_test.go index b61518e47..cd1916458 100644 --- a/pkg/github/search_test.go +++ b/pkg/github/search_test.go @@ -167,12 +167,12 @@ func Test_SearchCode(t *testing.T) { assert.Equal(t, "search_code", tool.Name) assert.NotEmpty(t, tool.Description) - assert.Contains(t, tool.InputSchema.Properties, "q") + assert.Contains(t, tool.InputSchema.Properties, "query") assert.Contains(t, tool.InputSchema.Properties, "sort") assert.Contains(t, tool.InputSchema.Properties, "order") assert.Contains(t, tool.InputSchema.Properties, "perPage") assert.Contains(t, tool.InputSchema.Properties, "page") - assert.ElementsMatch(t, tool.InputSchema.Required, []string{"q"}) + assert.ElementsMatch(t, tool.InputSchema.Required, []string{"query"}) // Setup mock search results mockSearchResult := &github.CodeSearchResult{ @@ -221,7 +221,7 @@ func Test_SearchCode(t *testing.T) { ), ), requestArgs: map[string]interface{}{ - "q": "fmt.Println language:go", + "query": "fmt.Println language:go", "sort": "indexed", "order": "desc", "page": float64(1), @@ -245,7 +245,7 @@ func Test_SearchCode(t *testing.T) { ), ), requestArgs: map[string]interface{}{ - "q": "fmt.Println language:go", + "query": "fmt.Println language:go", }, expectError: false, expectedResult: mockSearchResult, @@ -262,7 +262,7 @@ func Test_SearchCode(t *testing.T) { ), ), requestArgs: map[string]interface{}{ - "q": "invalid:query", + "query": "invalid:query", }, expectError: true, expectedErrMsg: "failed to search code", @@ -293,24 +293,90 @@ func Test_SearchCode(t *testing.T) { // Parse the result and get the text content if no error textContent := getTextResult(t, result) - // Unmarshal and verify the result - var returnedResult github.CodeSearchResult + // Unmarshal into MinimalCodeSearchResult (not raw CodeSearchResult) + var returnedResult MinimalCodeSearchResult err = json.Unmarshal([]byte(textContent.Text), &returnedResult) require.NoError(t, err) - assert.Equal(t, *tc.expectedResult.Total, *returnedResult.Total) - assert.Equal(t, *tc.expectedResult.IncompleteResults, *returnedResult.IncompleteResults) - assert.Len(t, returnedResult.CodeResults, len(tc.expectedResult.CodeResults)) - for i, code := range returnedResult.CodeResults { - assert.Equal(t, *tc.expectedResult.CodeResults[i].Name, *code.Name) - assert.Equal(t, *tc.expectedResult.CodeResults[i].Path, *code.Path) - assert.Equal(t, *tc.expectedResult.CodeResults[i].SHA, *code.SHA) - assert.Equal(t, *tc.expectedResult.CodeResults[i].HTMLURL, *code.HTMLURL) - assert.Equal(t, *tc.expectedResult.CodeResults[i].Repository.FullName, *code.Repository.FullName) + assert.Equal(t, *tc.expectedResult.Total, returnedResult.TotalCount) + assert.Equal(t, *tc.expectedResult.IncompleteResults, returnedResult.IncompleteResults) + assert.Len(t, returnedResult.Items, len(tc.expectedResult.CodeResults)) + for i, item := range returnedResult.Items { + assert.Equal(t, tc.expectedResult.CodeResults[i].GetName(), item.Name) + assert.Equal(t, tc.expectedResult.CodeResults[i].GetPath(), item.Path) + assert.Equal(t, tc.expectedResult.CodeResults[i].GetHTMLURL(), item.HTMLURL) + assert.Equal(t, tc.expectedResult.CodeResults[i].GetRepository().GetFullName(), item.Repository) } }) } } +func Test_SearchCode_TextMatchFragments(t *testing.T) { + // Setup mock search results with TextMatches populated + mockSearchResult := &github.CodeSearchResult{ + Total: github.Ptr(1), + IncompleteResults: github.Ptr(false), + CodeResults: []*github.CodeResult{ + { + Name: github.Ptr("main.go"), + Path: github.Ptr("cmd/main.go"), + SHA: github.Ptr("abc123"), + HTMLURL: github.Ptr("https://github.com/owner/repo/blob/main/cmd/main.go"), + Repository: &github.Repository{Name: github.Ptr("repo"), FullName: github.Ptr("owner/repo")}, + TextMatches: []*github.TextMatch{ + { + Fragment: github.Ptr("func main() {\n\tfmt.Println(\"hello world\")\n}"), + }, + { + Fragment: github.Ptr("import \"fmt\""), + }, + }, + }, + }, + } + + mockedClient := mock.NewMockedHTTPClient( + mock.WithRequestMatchHandler( + mock.GetSearchCode, + expectQueryParams(t, map[string]string{ + "q": "fmt.Println language:go", + "page": "1", + "per_page": "30", + }).andThen( + mockResponse(t, http.StatusOK, mockSearchResult), + ), + ), + ) + + client := github.NewClient(mockedClient) + _, handler := SearchCode(stubGetClientFn(client), translations.NullTranslationHelper) + + request := createMCPRequest(map[string]interface{}{ + "query": "fmt.Println language:go", + }) + + result, err := handler(context.Background(), request) + require.NoError(t, err) + + textContent := getTextResult(t, result) + + var returnedResult MinimalCodeSearchResult + err = json.Unmarshal([]byte(textContent.Text), &returnedResult) + require.NoError(t, err) + + assert.Equal(t, 1, returnedResult.TotalCount) + require.Len(t, returnedResult.Items, 1) + + item := returnedResult.Items[0] + assert.Equal(t, "main.go", item.Name) + assert.Equal(t, "cmd/main.go", item.Path) + assert.Equal(t, "owner/repo", item.Repository) + + // Verify text match fragments are included + require.Len(t, item.TextMatches, 2) + assert.Equal(t, "func main() {\n\tfmt.Println(\"hello world\")\n}", item.TextMatches[0]) + assert.Equal(t, "import \"fmt\"", item.TextMatches[1]) +} + func Test_SearchUsers(t *testing.T) { // Verify tool definition once mockClient := github.NewClient(nil) @@ -318,12 +384,12 @@ func Test_SearchUsers(t *testing.T) { assert.Equal(t, "search_users", tool.Name) assert.NotEmpty(t, tool.Description) - assert.Contains(t, tool.InputSchema.Properties, "q") + assert.Contains(t, tool.InputSchema.Properties, "query") assert.Contains(t, tool.InputSchema.Properties, "sort") assert.Contains(t, tool.InputSchema.Properties, "order") assert.Contains(t, tool.InputSchema.Properties, "perPage") assert.Contains(t, tool.InputSchema.Properties, "page") - assert.ElementsMatch(t, tool.InputSchema.Required, []string{"q"}) + assert.ElementsMatch(t, tool.InputSchema.Required, []string{"query"}) // Setup mock search results mockSearchResult := &github.UsersSearchResult{ @@ -376,7 +442,7 @@ func Test_SearchUsers(t *testing.T) { ), ), requestArgs: map[string]interface{}{ - "q": "location:finland language:go", + "query": "location:finland language:go", "sort": "followers", "order": "desc", "page": float64(1), @@ -400,7 +466,7 @@ func Test_SearchUsers(t *testing.T) { ), ), requestArgs: map[string]interface{}{ - "q": "location:finland language:go", + "query": "location:finland language:go", }, expectError: false, expectedResult: mockSearchResult, @@ -417,7 +483,7 @@ func Test_SearchUsers(t *testing.T) { ), ), requestArgs: map[string]interface{}{ - "q": "invalid:query", + "query": "invalid:query", }, expectError: true, expectedErrMsg: "failed to search users", @@ -468,3 +534,81 @@ func Test_SearchUsers(t *testing.T) { }) } } + +func Test_SearchCode_ZeroResults(t *testing.T) { + mockSearchResult := &github.CodeSearchResult{ + Total: github.Ptr(0), + IncompleteResults: github.Ptr(false), + CodeResults: nil, + } + + client := github.NewClient(mock.NewMockedHTTPClient( + mock.WithRequestMatchHandler( + mock.GetSearchCode, + mockResponse(t, http.StatusOK, mockSearchResult), + ), + )) + _, handler := SearchCode(stubGetClientFn(client), translations.NullTranslationHelper) + + request := createMCPRequest(map[string]interface{}{ + "query": "nonexistent_symbol_xyz org:squareup", + }) + + result, err := handler(context.Background(), request) + require.NoError(t, err) + + textContent := getTextResult(t, result) + + var returnedResult MinimalCodeSearchResult + err = json.Unmarshal([]byte(textContent.Text), &returnedResult) + require.NoError(t, err) + assert.Equal(t, 0, returnedResult.TotalCount) + assert.False(t, returnedResult.IncompleteResults) + assert.Empty(t, returnedResult.Items) +} + +func Test_SearchCode_TextMatchHeaderSent(t *testing.T) { + // Verify that the Accept header includes the text-match media type + headerChecked := false + mockSearchResult := &github.CodeSearchResult{ + Total: github.Ptr(1), + IncompleteResults: github.Ptr(false), + CodeResults: []*github.CodeResult{ + { + Name: github.Ptr("main.go"), + Path: github.Ptr("cmd/main.go"), + SHA: github.Ptr("abc123"), + HTMLURL: github.Ptr("https://github.com/owner/repo/blob/main/cmd/main.go"), + Repository: &github.Repository{FullName: github.Ptr("owner/repo")}, + }, + }, + } + + client := github.NewClient(mock.NewMockedHTTPClient( + mock.WithRequestMatchHandler( + mock.GetSearchCode, + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Assert the Accept header contains text-match media type + acceptHeader := r.Header.Get("Accept") + assert.Contains(t, acceptHeader, "application/vnd.github.v3.text-match+json", + "SearchCode must request text-match media type") + headerChecked = true + + // Return the mock response + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(mockSearchResult) + }), + ), + )) + _, handler := SearchCode(stubGetClientFn(client), translations.NullTranslationHelper) + + request := createMCPRequest(map[string]interface{}{ + "query": "main org:owner", + }) + + result, err := handler(context.Background(), request) + require.NoError(t, err) + require.NotNil(t, result) + assert.True(t, headerChecked, "Accept header check must have been executed") +}